%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 1996-2019. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at %% %% http://www.apache.org/licenses/LICENSE-2.0 %% %% Unless required by applicable law or agreed to in writing, software %% distributed under the License is distributed on an "AS IS" BASIS, %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. %% %% %CopyrightEnd% %% %% A string library that works on grapheme clusters, with the exception %% of codepoints of class 'prepend' and non modern (or decomposed) Hangul. %% If these codepoints appear, functions like 'find/2' may return a string %% which starts inside a grapheme cluster. %% These exceptions are made because the codepoints classes are %% seldom used and require that we are able look at previous codepoints in %% the stream and is thus hard to implement effectively. %% %% GC (grapheme cluster) implies that the length of string 'ß↑e̊' is 3 though %% it is represented by the codepoints [223,8593,101,778] or the %% utf8 binary <<195,159,226,134,145,101,204,138>> %% %% And that searching for strings or graphemes finds the correct positions: %% %% find("eeeee̊eee", "e̊") -> "e̊ee".: %% find("1£4e̊abcdef", "e") -> "ef" %% %% Most functions expect all input to be normalized to one form, %% see unicode:characters_to_nfc and unicode:characters_to_nfd functions. %% When appending strings no checking is done to verify that the %% result is valid unicode strings. %% %% The functions may crash for invalid utf-8 input. %% %% Return value should be kept consistent when return type is %% unicode:chardata() i.e. binary input => binary output, %% list input => list output mixed input => mixed output %% -module(string). -export([is_empty/1, length/1, to_graphemes/1, reverse/1, equal/2, equal/3, equal/4, slice/2, slice/3, pad/2, pad/3, pad/4, trim/1, trim/2, trim/3, chomp/1, take/2, take/3, take/4, lexemes/2, nth_lexeme/3, uppercase/1, lowercase/1, titlecase/1,casefold/1, prefix/2, split/2,split/3,replace/3,replace/4, find/2,find/3, next_codepoint/1, next_grapheme/1 ]). -export([to_float/1, to_integer/1]). %% Old (will be deprecated) lists/string API kept for backwards compability -export([len/1, concat/2, % equal/2, (extended in the new api) chr/2,rchr/2,str/2,rstr/2, span/2,cspan/2,substr/2,substr/3, tokens/2, chars/2,chars/3]). -export([copies/2,words/1,words/2,strip/1,strip/2,strip/3, sub_word/2,sub_word/3,left/2,left/3,right/2,right/3, sub_string/2,sub_string/3,centre/2,centre/3, join/2]). -export([to_upper/1, to_lower/1]). %% -import(lists,[member/2]). -compile({no_auto_import,[length/1]}). -compile({inline, [btoken/2, rev/1, append/2, stack/2, search_compile/1]}). -define(ASCII_LIST(CP1,CP2), CP1 < 256, CP2 < 256, CP1 =/= $\r). -export_type([grapheme_cluster/0]). -type grapheme_cluster() :: char() | [char()]. -type direction() :: 'leading' | 'trailing'. -dialyzer({no_improper_lists, [stack/2, length_b/3]}). %%% BIFs internal (not documented) should not to be used outside of this module %%% May be removed -export([list_to_float/1, list_to_integer/1]). %% Uses bifs: string:list_to_float/1 and string:list_to_integer/1 -spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when String :: string(), Float :: float(), Rest :: string(), Reason :: 'no_float' | 'not_a_list'. list_to_float(_) -> erlang:nif_error(undef). -spec list_to_integer(String) -> {Int, Rest} | {'error', Reason} when String :: string(), Int :: integer(), Rest :: string(), Reason :: 'no_integer' | 'not_a_list'. list_to_integer(_) -> erlang:nif_error(undef). %%% End of BIFs %% Check if string is the empty string -spec is_empty(String::unicode:chardata()) -> boolean(). is_empty([]) -> true; is_empty(<<>>) -> true; is_empty([L|R]) -> is_empty(L) andalso is_empty(R); is_empty(_) -> false. %% Count the number of grapheme clusters in chardata -spec length(String::unicode:chardata()) -> non_neg_integer(). length(<>) -> length_b(Bin, CP1, 0); length(CD) -> length_1(CD, 0). %% Convert a string to a list of grapheme clusters -spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()]. to_graphemes(CD0) -> case unicode_util:gc(CD0) of [GC|CD] -> [GC|to_graphemes(CD)]; [] -> []; {error, Err} -> error({badarg, Err}) end. %% Compare two strings return boolean, assumes that the input are %% normalized to same form, see unicode:characters_to_nfX_xxx(..) -spec equal(A, B) -> boolean() when A::unicode:chardata(), B::unicode:chardata(). equal(A,B) when is_binary(A), is_binary(B) -> A =:= B; equal(A,B) -> equal_1(A,B). %% Compare two strings return boolean, assumes that the input are %% normalized to same form, see unicode:characters_to_nfX_xxx(..) %% does casefold on the fly -spec equal(A, B, IgnoreCase) -> boolean() when A::unicode:chardata(), B::unicode:chardata(), IgnoreCase :: boolean(). equal(A, B, false) -> equal(A,B); equal(A, B, true) -> equal_nocase(A,B). %% Compare two strings return boolean %% if specified does casefold and normalization on the fly -spec equal(A, B, IgnoreCase, Norm) -> boolean() when A :: unicode:chardata(), B :: unicode:chardata(), IgnoreCase :: boolean(), Norm :: 'none' | 'nfc' | 'nfd' | 'nfkc' | 'nfkd'. equal(A, B, Case, none) -> equal(A,B,Case); equal(A, B, false, Norm) -> equal_norm(A, B, Norm); equal(A, B, true, Norm) -> equal_norm_nocase(A, B, Norm). %% Reverse grapheme clusters -spec reverse(String::unicode:chardata()) -> [grapheme_cluster()]. reverse(<>) -> reverse_b(Rest, CP1, []); reverse(CD) -> reverse_1(CD, []). %% Slice a string and return rest of string %% Note: counts grapheme_clusters -spec slice(String, Start) -> Slice when String::unicode:chardata(), Start :: non_neg_integer(), Slice :: unicode:chardata(). slice(CD, N) when is_integer(N), N >= 0 -> case slice_l0(CD, N) of [] when is_binary(CD) -> <<>>; Res -> Res end. -spec slice(String, Start, Length) -> Slice when String::unicode:chardata(), Start :: non_neg_integer(), Length :: 'infinity' | non_neg_integer(), Slice :: unicode:chardata(). slice(CD, N, Length) when is_integer(N), N >= 0, is_integer(Length), Length > 0 -> case slice_l0(CD, N) of [] when is_binary(CD) -> <<>>; L -> slice_trail(L, Length) end; slice(CD, N, infinity) -> case slice_l0(CD, N) of [] when is_binary(CD) -> <<>>; Res -> Res end; slice(CD, _, 0) -> case is_binary(CD) of true -> <<>>; false -> [] end. %% Pad a string to desired length -spec pad(String, Length) -> unicode:charlist() when String ::unicode:chardata(), Length :: integer(). pad(CD, Length) -> pad(CD, Length, trailing, $\s). -spec pad(String, Length, Dir) -> unicode:charlist() when String ::unicode:chardata(), Length :: integer(), Dir :: direction() | 'both'. pad(CD, Length, Dir) -> pad(CD, Length, Dir, $\s). -spec pad(String, Length, Dir, Char) -> unicode:charlist() when String ::unicode:chardata(), Length :: integer(), Dir :: direction() | 'both', Char :: grapheme_cluster(). pad(CD, Length, leading, Char) when is_integer(Length) -> Len = length(CD), [lists:duplicate(max(0, Length-Len), Char), CD]; pad(CD, Length, trailing, Char) when is_integer(Length) -> Len = length(CD), [CD|lists:duplicate(max(0, Length-Len), Char)]; pad(CD, Length, both, Char) when is_integer(Length) -> Len = length(CD), Size = max(0, Length-Len), Pre = lists:duplicate(Size div 2, Char), Post = case Size rem 2 of 1 -> [Char]; _ -> [] end, [Pre, CD, Pre|Post]. %% Strip characters from whitespace or Separator in Direction -spec trim(String) -> unicode:chardata() when String :: unicode:chardata(). trim(Str) -> trim(Str, both, unicode_util:whitespace()). -spec trim(String, Dir) -> unicode:chardata() when String :: unicode:chardata(), Dir :: direction() | 'both'. trim(Str, Dir) -> trim(Str, Dir, unicode_util:whitespace()). -spec trim(String, Dir, Characters) -> unicode:chardata() when String :: unicode:chardata(), Dir :: direction() | 'both', Characters :: [grapheme_cluster()]. trim(Str, _, []) -> Str; trim(Str, leading, [Sep]) when is_list(Str), Sep < 256 -> trim_ls(Str, Sep); trim(Str, leading, Sep) when is_list(Sep) -> trim_l(Str, Sep); trim(Str, trailing, [Sep]) when is_list(Str), Sep < 256 -> trim_ts(Str, Sep); trim(Str, trailing, Seps0) when is_list(Seps0) -> Seps = search_pattern(Seps0), trim_t(Str, 0, Seps); trim(Str, both, Sep) when is_list(Sep) -> trim(trim(Str,leading,Sep), trailing, Sep). %% Delete trailing newlines or \r\n -spec chomp(String::unicode:chardata()) -> unicode:chardata(). chomp(Str) -> trim(Str, trailing, [[$\r,$\n],$\n]). %% Split String into two parts where the leading part consists of Characters -spec take(String, Characters) -> {Leading, Trailing} when String::unicode:chardata(), Characters::[grapheme_cluster()], Leading::unicode:chardata(), Trailing::unicode:chardata(). take(Str, Sep) -> take(Str, Sep, false, leading). -spec take(String, Characters, Complement) -> {Leading, Trailing} when String::unicode:chardata(), Characters::[grapheme_cluster()], Complement::boolean(), Leading::unicode:chardata(), Trailing::unicode:chardata(). take(Str, Sep, Complement) -> take(Str, Sep, Complement, leading). -spec take(String, Characters, Complement, Dir) -> {Leading, Trailing} when String::unicode:chardata(), Characters::[grapheme_cluster()], Complement::boolean(), Dir::direction(), Leading::unicode:chardata(), Trailing::unicode:chardata(). take(Str, [], Complement, Dir) -> Empty = case is_binary(Str) of true -> <<>>; false -> [] end, case {Complement,Dir} of {false, leading} -> {Empty, Str}; {false, trailing} -> {Str, Empty}; {true, leading} -> {Str, Empty}; {true, trailing} -> {Empty, Str} end; take(Str, Sep, false, leading) -> take_l(Str, Sep, []); take(Str, Sep0, true, leading) -> Sep = search_pattern(Sep0), take_lc(Str, Sep, []); take(Str, Sep0, false, trailing) -> Sep = search_pattern(Sep0), take_t(Str, 0, Sep); take(Str, Sep0, true, trailing) -> Sep = search_pattern(Sep0), take_tc(Str, 0, Sep). %% Uppercase all chars in Str -spec uppercase(String::unicode:chardata()) -> unicode:chardata(). uppercase(CD) when is_list(CD) -> try uppercase_list(CD, false) catch unchanged -> CD end; uppercase(<>=Orig) -> try uppercase_bin(CP1, Rest, false) of List -> unicode:characters_to_binary(List) catch unchanged -> Orig end; uppercase(<<>>) -> <<>>; uppercase(Bin) -> error({badarg, Bin}). %% Lowercase all chars in Str -spec lowercase(String::unicode:chardata()) -> unicode:chardata(). lowercase(CD) when is_list(CD) -> try lowercase_list(CD, false) catch unchanged -> CD end; lowercase(<>=Orig) -> try lowercase_bin(CP1, Rest, false) of List -> unicode:characters_to_binary(List) catch unchanged -> Orig end; lowercase(<<>>) -> <<>>; lowercase(Bin) -> error({badarg, Bin}). %% Make a titlecase of the first char in Str -spec titlecase(String::unicode:chardata()) -> unicode:chardata(). titlecase(CD) when is_list(CD) -> case unicode_util:titlecase(CD) of [GC|Tail] -> append(GC,Tail); Empty -> Empty end; titlecase(CD) when is_binary(CD) -> case unicode_util:titlecase(CD) of [CP|Chars] when is_integer(CP) -> <>; [CPs|Chars] -> << << <> || CP <- CPs>>/binary, Chars/binary>>; [] -> <<>> end. %% Make a comparable string of the Str should be used for equality tests only -spec casefold(String::unicode:chardata()) -> unicode:chardata(). casefold(CD) when is_list(CD) -> try casefold_list(CD, false) catch unchanged -> CD end; casefold(<>=Orig) -> try casefold_bin(CP1, Rest, false) of List -> unicode:characters_to_binary(List) catch unchanged -> Orig end; casefold(<<>>) -> <<>>; casefold(Bin) -> error({badarg, Bin}). -spec to_integer(String) -> {Int, Rest} | {'error', Reason} when String :: unicode:chardata(), Int :: integer(), Rest :: unicode:chardata(), Reason :: 'no_integer' | badarg. to_integer(String) -> try take(String, "+-0123456789") of {Head, Tail} -> case is_empty(Head) of true -> {error, no_integer}; false -> List = unicode:characters_to_list(Head), case string:list_to_integer(List) of {error, _} = Err -> Err; {Int, Rest} -> to_number(String, Int, Rest, List, Tail) end end catch _:_ -> {error, badarg} end. -spec to_float(String) -> {Float, Rest} | {'error', Reason} when String :: unicode:chardata(), Float :: float(), Rest :: unicode:chardata(), Reason :: 'no_float' | 'badarg'. to_float(String) -> try take(String, "+-0123456789eE.,") of {Head, Tail} -> case is_empty(Head) of true -> {error, no_float}; false -> List = unicode:characters_to_list(Head), case string:list_to_float(List) of {error, _} = Err -> Err; {Float, Rest} -> to_number(String, Float, Rest, List, Tail) end end catch _:_ -> {error, badarg} end. to_number(String, Number, Rest, List, _Tail) when is_binary(String) -> BSz = erlang:length(List)-erlang:length(Rest), <<_:BSz/binary, Cont/binary>> = String, {Number, Cont}; to_number(_, Number, Rest, _, Tail) -> {Number, concat(Rest,Tail)}. %% Return the remaining string with prefix removed or else nomatch -spec prefix(String::unicode:chardata(), Prefix::unicode:chardata()) -> 'nomatch' | unicode:chardata(). prefix(Str, Prefix0) -> Result = case unicode:characters_to_list(Prefix0) of [] -> Str; Prefix -> prefix_1(Str, Prefix) end, case Result of [] when is_binary(Str) -> <<>>; Res -> Res end. %% split String with the first occurrence of SearchPattern, return list of splits -spec split(String, SearchPattern) -> [unicode:chardata()] when String :: unicode:chardata(), SearchPattern :: unicode:chardata(). split(String, SearchPattern) -> split(String, SearchPattern, leading). %% split String with SearchPattern, return list of splits -spec split(String, SearchPattern, Where) -> [unicode:chardata()] when String :: unicode:chardata(), SearchPattern :: unicode:chardata(), Where :: direction() | 'all'. split(String, SearchPattern, Where) -> case is_empty(SearchPattern) of true -> [String]; false -> SearchPatternCPs = unicode:characters_to_list(SearchPattern), case split_1(String, SearchPatternCPs, 0, Where, [], []) of {_Curr, []} -> [String]; {_Curr, Acc} when Where =:= trailing -> Acc; {Curr, Acc} when Where =:= all -> lists:reverse([Curr|Acc]); Acc when is_list(Acc) -> Acc end end. %% Replace the first SearchPattern in String with Replacement -spec replace(String, SearchPattern, Replacement) -> [unicode:chardata()] when String :: unicode:chardata(), SearchPattern :: unicode:chardata(), Replacement :: unicode:chardata(). replace(String, SearchPattern, Replacement) -> lists:join(Replacement, split(String, SearchPattern)). %% Replace Where SearchPattern in String with Replacement -spec replace(String, SearchPattern, Replacement, Where) -> [unicode:chardata()] when String :: unicode:chardata(), SearchPattern :: unicode:chardata(), Replacement :: unicode:chardata(), Where :: direction() | 'all'. replace(String, SearchPattern, Replacement, Where) -> lists:join(Replacement, split(String, SearchPattern, Where)). %% Split Str into a list of chardata separated by one of the grapheme %% clusters in Seps -spec lexemes(String::unicode:chardata(), SeparatorList::[grapheme_cluster()]) -> [unicode:chardata()]. lexemes([], _) -> []; lexemes(Str, []) -> [Str]; lexemes(Str, Seps0) when is_list(Seps0) -> Seps = search_pattern(Seps0), lexemes_m(Str, Seps, []). -spec nth_lexeme(String, N, SeparatorList) -> unicode:chardata() when String::unicode:chardata(), N::non_neg_integer(), SeparatorList::[grapheme_cluster()]. nth_lexeme(Str, 1, []) -> Str; nth_lexeme(Str, N, Seps0) when is_list(Seps0), is_integer(N), N > 0 -> Seps = search_pattern(Seps0), nth_lexeme_m(Str, Seps, N). %% find first SearchPattern in String return rest of string -spec find(String, SearchPattern) -> unicode:chardata() | 'nomatch' when String::unicode:chardata(), SearchPattern::unicode:chardata(). find(String, SearchPattern) -> find(String, SearchPattern, leading). %% find SearchPattern in String (search in Dir direction) return rest of string -spec find(String, SearchPattern, Dir) -> unicode:chardata() | 'nomatch' when String::unicode:chardata(), SearchPattern::unicode:chardata(), Dir::direction(). find(String, "", _) -> String; find(String, <<>>, _) -> String; find(String, SearchPattern, leading) -> find_l(String, unicode:characters_to_list(SearchPattern)); find(String, SearchPattern, trailing) -> find_r(String, unicode:characters_to_list(SearchPattern), nomatch). %% Fetch first grapheme cluster and return rest in tail -spec next_grapheme(String::unicode:chardata()) -> maybe_improper_list(grapheme_cluster(),unicode:chardata()) | {error,unicode:chardata()}. next_grapheme(CD) -> unicode_util:gc(CD). %% Fetch first codepoint and return rest in tail -spec next_codepoint(String::unicode:chardata()) -> maybe_improper_list(char(),unicode:chardata()) | {error,unicode:chardata()}. next_codepoint(CD) -> unicode_util:cp(CD). %% Internals length_1([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2) -> length_1(Cont, N+1); length_1(Str, N) -> case unicode_util:gc(Str) of [] -> N; [_|Rest] -> length_1(Rest, N+1); {error, Err} -> error({badarg, Err}) end. length_b(<>, CP1, N) when ?ASCII_LIST(CP1,CP2) -> length_b(Rest, CP2, N+1); length_b(Bin0, CP1, N) -> [_|Bin1] = unicode_util:gc([CP1|Bin0]), case unicode_util:cp(Bin1) of [] -> N+1; [CP3|Bin] -> length_b(Bin, CP3, N+1); {error, Err} -> error({badarg, Err}) end. equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) -> A =:= B andalso equal_1(AR, BR); equal_1([], BR) -> is_empty(BR); equal_1(A0,B0) -> case {unicode_util:cp(A0), unicode_util:cp(B0)} of {[CP|A],[CP|B]} -> equal_1(A,B); {[], []} -> true; {L1,L2} when is_list(L1), is_list(L2) -> false end. equal_nocase(A, A) -> true; equal_nocase(A0, B0) -> case {unicode_util:cp(unicode_util:casefold(A0)), unicode_util:cp(unicode_util:casefold(B0))} of {[CP|A],[CP|B]} -> equal_nocase(A,B); {[], []} -> true; {L1,L2} when is_list(L1), is_list(L2) -> false end. equal_norm(A, A, _Norm) -> true; equal_norm(A0, B0, Norm) -> case {unicode_util:cp(unicode_util:Norm(A0)), unicode_util:cp(unicode_util:Norm(B0))} of {[CP|A],[CP|B]} -> equal_norm(A,B, Norm); {[], []} -> true; {L1,L2} when is_list(L1), is_list(L2) -> false end. equal_norm_nocase(A, A, _Norm) -> true; equal_norm_nocase(A0, B0, Norm) -> case {unicode_util:cp(unicode_util:casefold(unicode_util:Norm(A0))), unicode_util:cp(unicode_util:casefold(unicode_util:Norm(B0)))} of {[CP|A],[CP|B]} -> equal_norm_nocase(A,B, Norm); {[], []} -> true; {L1,L2} when is_list(L1), is_list(L2) -> false end. reverse_1([CP1|[CP2|_]=Cont], Acc) when ?ASCII_LIST(CP1,CP2) -> reverse_1(Cont, [CP1|Acc]); reverse_1(CD, Acc) -> case unicode_util:gc(CD) of [GC|Rest] -> reverse_1(Rest, [GC|Acc]); [] -> Acc; {error, Err} -> error({badarg, Err}) end. reverse_b(<>, CP1, Acc) when ?ASCII_LIST(CP1,CP2) -> reverse_b(Rest, CP2, [CP1|Acc]); reverse_b(Bin0, CP1, Acc) -> [GC|Bin1] = unicode_util:gc([CP1|Bin0]), case unicode_util:cp(Bin1) of [] -> [GC|Acc]; [CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc]); {error, Err} -> error({badarg, Err}) end. slice_l0(<>, N) when N > 0 -> slice_lb(Bin, CP1, N); slice_l0(L, N) -> slice_l(L, N). slice_l([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 -> slice_l(Cont, N-1); slice_l(CD, N) when N > 0 -> case unicode_util:gc(CD) of [_|Cont] -> slice_l(Cont, N-1); [] -> []; {error, Err} -> error({badarg, Err}) end; slice_l(Cont, 0) -> Cont. slice_lb(<>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 1 -> slice_lb(Bin, CP2, N-1); slice_lb(Bin, CP1, N) -> [_|Rest] = unicode_util:gc([CP1|Bin]), if N > 1 -> case unicode_util:cp(Rest) of [CP2|Cont] -> slice_lb(Cont, CP2, N-1); [] -> <<>>; {error, Err} -> error({badarg, Err}) end; N =:= 1 -> Rest end. slice_trail(Orig, N) when is_binary(Orig) -> case Orig of <> when N > 0 -> Length = slice_bin(Bin, CP1, N), Sz = byte_size(Orig) - Length, <> = Orig, Keep; <<_, _/binary>> when N > 0 -> error({badarg, Orig}); _ -> <<>> end; slice_trail(CD, N) when is_list(CD) -> slice_list(CD, N). slice_list([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 -> [CP1|slice_list(Cont, N-1)]; slice_list(CD, N) when N > 0 -> case unicode_util:gc(CD) of [GC|Cont] -> append(GC, slice_list(Cont, N-1)); [] -> []; {error, Err} -> error({badarg, Err}) end; slice_list(_, 0) -> []. slice_bin(<>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 0 -> slice_bin(Bin, CP2, N-1); slice_bin(CD, CP1, N) when N > 0 -> [_|Bin] = unicode_util:gc([CP1|CD]), case unicode_util:cp(Bin) of [CP2|Cont] -> slice_bin(Cont, CP2, N-1); [] -> 0; {error, Err} -> error({badarg, Err}) end; slice_bin(CD, CP1, 0) -> byte_size(CD)+byte_size(<>). uppercase_list([CP1|[CP2|_]=Cont], _Changed) when $a =< CP1, CP1 =< $z, CP2 < 256 -> [CP1-32|uppercase_list(Cont, true)]; uppercase_list([CP1|[CP2|_]=Cont], Changed) when CP1 < 128, CP2 < 256 -> [CP1|uppercase_list(Cont, Changed)]; uppercase_list([], true) -> []; uppercase_list([], false) -> throw(unchanged); uppercase_list(CPs0, Changed) -> case unicode_util:uppercase(CPs0) of [Char|CPs] when Char =:= hd(CPs0) -> [Char|uppercase_list(CPs, Changed)]; [Char|CPs] -> append(Char,uppercase_list(CPs, true)); [] -> uppercase_list([], Changed) end. uppercase_bin(CP1, <>, _Changed) when $a =< CP1, CP1 =< $z, CP2 < 256 -> [CP1-32|uppercase_bin(CP2, Bin, true)]; uppercase_bin(CP1, <>, Changed) when CP1 < 128, CP2 < 256 -> [CP1|uppercase_bin(CP2, Bin, Changed)]; uppercase_bin(CP1, Bin, Changed) -> case unicode_util:uppercase([CP1|Bin]) of [CP1|CPs] -> case unicode_util:cp(CPs) of [Next|Rest] -> [CP1|uppercase_bin(Next, Rest, Changed)]; [] when Changed -> [CP1]; [] -> throw(unchanged); {error, Err} -> error({badarg, Err}) end; [Char|CPs] -> case unicode_util:cp(CPs) of [Next|Rest] -> [Char|uppercase_bin(Next, Rest, true)]; [] -> [Char]; {error, Err} -> error({badarg, Err}) end end. lowercase_list([CP1|[CP2|_]=Cont], _Changed) when $A =< CP1, CP1 =< $Z, CP2 < 256 -> [CP1+32|lowercase_list(Cont, true)]; lowercase_list([CP1|[CP2|_]=Cont], Changed) when CP1 < 128, CP2 < 256 -> [CP1|lowercase_list(Cont, Changed)]; lowercase_list([], true) -> []; lowercase_list([], false) -> throw(unchanged); lowercase_list(CPs0, Changed) -> case unicode_util:lowercase(CPs0) of [Char|CPs] when Char =:= hd(CPs0) -> [Char|lowercase_list(CPs, Changed)]; [Char|CPs] -> append(Char,lowercase_list(CPs, true)); [] -> lowercase_list([], Changed) end. lowercase_bin(CP1, <>, _Changed) when $A =< CP1, CP1 =< $Z, CP2 < 256 -> [CP1+32|lowercase_bin(CP2, Bin, true)]; lowercase_bin(CP1, <>, Changed) when CP1 < 128, CP2 < 256 -> [CP1|lowercase_bin(CP2, Bin, Changed)]; lowercase_bin(CP1, Bin, Changed) -> case unicode_util:lowercase([CP1|Bin]) of [CP1|CPs] -> case unicode_util:cp(CPs) of [Next|Rest] -> [CP1|lowercase_bin(Next, Rest, Changed)]; [] when Changed -> [CP1]; [] -> throw(unchanged); {error, Err} -> error({badarg, Err}) end; [Char|CPs] -> case unicode_util:cp(CPs) of [Next|Rest] -> [Char|lowercase_bin(Next, Rest, true)]; [] -> [Char]; {error, Err} -> error({badarg, Err}) end end. casefold_list([CP1|[CP2|_]=Cont], _Changed) when $A =< CP1, CP1 =< $Z, CP2 < 256 -> [CP1+32|casefold_list(Cont, true)]; casefold_list([CP1|[CP2|_]=Cont], Changed) when CP1 < 128, CP2 < 256 -> [CP1|casefold_list(Cont, Changed)]; casefold_list([], true) -> []; casefold_list([], false) -> throw(unchanged); casefold_list(CPs0, Changed) -> case unicode_util:casefold(CPs0) of [Char|CPs] when Char =:= hd(CPs0) -> [Char|casefold_list(CPs, Changed)]; [Char|CPs] -> append(Char,casefold_list(CPs, true)); [] -> casefold_list([], Changed) end. casefold_bin(CP1, <>, _Changed) when $A =< CP1, CP1 =< $Z, CP2 < 256 -> [CP1+32|casefold_bin(CP2, Bin, true)]; casefold_bin(CP1, <>, Changed) when CP1 < 128, CP2 < 256 -> [CP1|casefold_bin(CP2, Bin, Changed)]; casefold_bin(CP1, Bin, Changed) -> case unicode_util:casefold([CP1|Bin]) of [CP1|CPs] -> case unicode_util:cp(CPs) of [Next|Rest] -> [CP1|casefold_bin(Next, Rest, Changed)]; [] when Changed -> [CP1]; [] -> throw(unchanged); {error, Err} -> error({badarg, Err}) end; [Char|CPs] -> case unicode_util:cp(CPs) of [Next|Rest] -> [Char|casefold_bin(Next, Rest, true)]; [] -> [Char]; {error, Err} -> error({badarg, Err}) end end. %% Fast path for ascii searching for one character in lists trim_ls([CP1|[CP2|_]=Cont]=Str, Sep) when ?ASCII_LIST(CP1,CP2) -> case Sep of CP1 -> trim_ls(Cont, Sep); _ -> Str end; trim_ls(Str, Sep) -> trim_l(Str, [Sep]). trim_l([CP1|[CP2|_]=Cont]=Str, Sep) when ?ASCII_LIST(CP1,CP2) -> case lists:member(CP1, Sep) of true -> trim_l(Cont, Sep); false -> Str end; trim_l([Bin|Cont0], Sep) when is_binary(Bin) -> case bin_search_inv(Bin, Cont0, Sep) of {nomatch, Cont} -> trim_l(Cont, Sep); Keep -> Keep end; trim_l(Str, Sep) when is_list(Str) -> case unicode_util:gc(Str) of [C|Cs] -> case lists:member(C, Sep) of true -> trim_l(Cs, Sep); false -> Str end; [] -> [] end; trim_l(Bin, Sep) when is_binary(Bin) -> case bin_search_inv(Bin, [], Sep) of {nomatch,_} -> <<>>; [Keep] -> Keep end. %% Fast path for ascii searching for one character in lists trim_ts([Sep|Cs1]=Str, Sep) -> case Cs1 of [] -> []; [CP2|_] when ?ASCII_LIST(Sep,CP2) -> Tail = trim_ts(Cs1, Sep), case is_empty(Tail) of true -> []; false -> [Sep|Tail] end; _ -> trim_t(Str, 0, search_pattern([Sep])) end; trim_ts([CP|Cont],Sep) when is_integer(CP) -> [CP|trim_ts(Cont, Sep)]; trim_ts(Str, Sep) -> trim_t(Str, 0, search_pattern([Sep])). trim_t([CP1|Cont]=Cs0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) -> case lists:member(CP1, CPs) of true -> [GC|Cs1] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of true -> Tail = trim_t(Cs1, 0, Seps), case is_empty(Tail) of true -> []; false -> append(GC,Tail) end; false -> append(GC,trim_t(Cs1, 0, Seps)) end; false -> [CP1|trim_t(Cont, 0, Seps)] end; trim_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, Seps = search_compile(Seps0), case bin_search(Rest, Cont0, Seps) of {nomatch,_} -> stack(Bin, trim_t(Cont0, 0, Seps)); [SepStart|Cont1] -> case bin_search_inv(SepStart, Cont1, GCs) of {nomatch, Cont} -> Tail = trim_t(Cont, 0, Seps), case is_empty(Tail) of true -> KeepSz = byte_size(Bin) - byte_size(SepStart), <> = Bin, Keep; false -> Used = cp_prefix(Cont0, Cont), stack(Bin, stack(Used, Tail)) end; [NonSep|Cont] when is_binary(NonSep) -> KeepSz = byte_size(Bin) - byte_size(NonSep), trim_t([Bin|Cont], KeepSz, Seps) end end; trim_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) -> case unicode_util:gc(Str) of [GC|Cs1] -> case lists:member(GC, GCs) of true -> Tail = trim_t(Cs1, 0, Seps), case is_empty(Tail) of true -> []; false -> append(GC,Tail) end; false -> append(GC,trim_t(Cs1, 0, Seps)) end; [] -> [] end; trim_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, Seps = search_compile(Seps0), case bin_search(Rest, [], Seps) of {nomatch,_} -> Bin; [SepStart] -> case bin_search_inv(SepStart, [], GCs) of {nomatch,_} -> KeepSz = byte_size(Bin) - byte_size(SepStart), <> = Bin, Keep; [NonSep] -> KeepSz = byte_size(Bin) - byte_size(NonSep), trim_t(Bin, KeepSz, Seps) end end. take_l([CP1|[CP2|_]=Cont]=Str, Seps, Acc) when ?ASCII_LIST(CP1,CP2) -> case lists:member(CP1, Seps) of true -> take_l(Cont, Seps, [CP1|Acc]); false -> {rev(Acc), Str} end; take_l([Bin|Cont0], Seps, Acc) when is_binary(Bin) -> case bin_search_inv(Bin, Cont0, Seps) of {nomatch, Cont} -> Used = cp_prefix(Cont0, Cont), take_l(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]); [Bin1|_]=After when is_binary(Bin1) -> First = byte_size(Bin) - byte_size(Bin1), <> = Bin, {btoken(Keep,Acc), After} end; take_l(Str, Seps, Acc) when is_list(Str) -> case unicode_util:gc(Str) of [C|Cs] -> case lists:member(C, Seps) of true -> take_l(Cs, Seps, append(rev(C),Acc)); false -> {rev(Acc), Str} end; [] -> {rev(Acc), []} end; take_l(Bin, Seps, Acc) when is_binary(Bin) -> case bin_search_inv(Bin, [], Seps) of {nomatch,_} -> {btoken(Bin, Acc), <<>>}; [After] -> First = byte_size(Bin) - byte_size(After), <> = Bin, {btoken(Keep, Acc), After} end. take_lc([CP1|Cont]=Str0, {GCs,CPs,_}=Seps, Acc) when is_integer(CP1) -> case lists:member(CP1, CPs) of true -> [GC|Str] = unicode_util:gc(Str0), case lists:member(GC, GCs) of false -> take_lc(Str, Seps, append(rev(GC),Acc)); true -> {rev(Acc), Str0} end; false -> take_lc(Cont, Seps, append(CP1,Acc)) end; take_lc([Bin|Cont0], Seps0, Acc) when is_binary(Bin) -> Seps = search_compile(Seps0), case bin_search(Bin, Cont0, Seps) of {nomatch, Cont} -> Used = cp_prefix(Cont0, Cont), take_lc(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]); [Bin1|_]=After when is_binary(Bin1) -> First = byte_size(Bin) - byte_size(Bin1), <> = Bin, {btoken(Keep,Acc), After} end; take_lc(Str, {GCs,_,_}=Seps, Acc) when is_list(Str) -> case unicode_util:gc(Str) of [C|Cs] -> case lists:member(C, GCs) of false -> take_lc(Cs, Seps, append(rev(C),Acc)); true -> {rev(Acc), Str} end; [] -> {rev(Acc), []} end; take_lc(Bin, Seps0, Acc) when is_binary(Bin) -> Seps = search_compile(Seps0), case bin_search(Bin, [], Seps) of {nomatch,_} -> {btoken(Bin, Acc), <<>>}; [After] -> First = byte_size(Bin) - byte_size(After), <> = Bin, {btoken(Keep, Acc), After} end. take_t([CP1|Cont]=Str0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) -> case lists:member(CP1, CPs) of true -> [GC|Str] = unicode_util:gc(Str0), case lists:member(GC, GCs) of true -> {Head, Tail} = take_t(Str, 0, Seps), case is_empty(Head) of true -> {Head, append(GC,Tail)}; false -> {append(GC,Head), Tail} end; false -> {Head, Tail} = take_t(Str, 0, Seps), {append(GC,Head), Tail} end; false -> {Head, Tail} = take_t(Cont, 0, Seps), {[CP1|Head], Tail} end; take_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, Seps = search_compile(Seps0), case bin_search(Rest, Cont0, Seps) of {nomatch,Cont} -> Used = cp_prefix(Cont0, Cont), {Head, Tail} = take_t(Cont, 0, Seps), {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail}; [SepStart|Cont1] -> case bin_search_inv(SepStart, Cont1, GCs) of {nomatch, Cont} -> {Head, Tail} = take_t(Cont, 0, Seps), Used = cp_prefix(Cont0, Cont), case is_empty(Head) of true -> KeepSz = byte_size(Bin) - byte_size(SepStart), <> = Bin, {Keep, stack(stack(End,Used),Tail)}; false -> {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail} end; [NonSep|Cont] when is_binary(NonSep) -> KeepSz = byte_size(Bin) - byte_size(NonSep), take_t([Bin|Cont], KeepSz, Seps) end end; take_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) -> case unicode_util:gc(Str) of [GC|Cs1] -> case lists:member(GC, GCs) of true -> {Head, Tail} = take_t(Cs1, 0, Seps), case is_empty(Head) of true -> {Head, append(GC,Tail)}; false -> {append(GC,Head), Tail} end; false -> {Head, Tail} = take_t(Cs1, 0, Seps), {append(GC,Head), Tail} end; [] -> {[],[]} end; take_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, Seps = search_compile(Seps0), case bin_search(Rest, [], Seps) of {nomatch,_} -> {Bin, <<>>}; [SepStart] -> case bin_search_inv(SepStart, [], GCs) of {nomatch,_} -> KeepSz = byte_size(Bin) - byte_size(SepStart), <> = Bin, {Before, End}; [NonSep] -> KeepSz = byte_size(Bin) - byte_size(NonSep), take_t(Bin, KeepSz, Seps) end end. take_tc([CP1|[CP2|_]=Cont], _, {GCs,_,_}=Seps) when ?ASCII_LIST(CP1,CP2) -> case lists:member(CP1, GCs) of false -> {Head, Tail} = take_tc(Cont, 0, Seps), case is_empty(Head) of true -> {Head, append(CP1,Tail)}; false -> {append(CP1,Head), Tail} end; true -> {Head, Tail} = take_tc(Cont, 0, Seps), {append(CP1,Head), Tail} end; take_tc([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, case bin_search_inv(Rest, Cont0, GCs) of {nomatch,Cont} -> Used = cp_prefix(Cont0, Cont), {Head, Tail} = take_tc(Cont, 0, Seps0), {stack(unicode:characters_to_binary([Bin|Used]), Head), Tail}; [SepStart|Cont1] -> Seps = search_compile(Seps0), case bin_search(SepStart, Cont1, Seps) of {nomatch, Cont} -> {Head, Tail} = take_tc(Cont, 0, Seps), Used = cp_prefix(Cont0, Cont), case is_empty(Head) of true -> KeepSz = byte_size(Bin) - byte_size(SepStart), <> = Bin, {Keep, stack(stack(End,Used),Tail)}; false -> {stack(unicode:characters_to_binary([Bin|Used]),Head), Tail} end; [NonSep|Cont] when is_binary(NonSep) -> KeepSz = byte_size(Bin) - byte_size(NonSep), take_tc([Bin|Cont], KeepSz, Seps) end end; take_tc(Str, 0, {GCs,_,_}=Seps) when is_list(Str) -> case unicode_util:gc(Str) of [GC|Cs1] -> case lists:member(GC, GCs) of false -> {Head, Tail} = take_tc(Cs1, 0, Seps), case is_empty(Head) of true -> {Head, append(GC,Tail)}; false -> {append(GC,Head), Tail} end; true -> {Head, Tail} = take_tc(Cs1, 0, Seps), {append(GC,Head), Tail} end; [] -> {[],[]} end; take_tc(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) -> <<_:N/binary, Rest/binary>> = Bin, case bin_search_inv(Rest, [], GCs) of {nomatch,_} -> {Bin, <<>>}; [SepStart] -> Seps = search_compile(Seps0), case bin_search(SepStart, [], Seps) of {nomatch,_} -> KeepSz = byte_size(Bin) - byte_size(SepStart), <> = Bin, {Before, End}; [NonSep] -> KeepSz = byte_size(Bin) - byte_size(NonSep), take_tc(Bin, KeepSz, Seps) end end. prefix_1(Cs0, [GC]) -> case unicode_util:gc(Cs0) of [GC|Cs] -> Cs; _ -> nomatch end; prefix_1([CP|Cs], [Pre|PreR]) when is_integer(CP) -> case CP =:= Pre of true -> prefix_1(Cs,PreR); false -> nomatch end; prefix_1(<>, [Pre|PreR]) -> case CP =:= Pre of true -> prefix_1(Cs,PreR); false -> nomatch end; prefix_1(Cs0, [Pre|PreR]) -> case unicode_util:cp(Cs0) of [Pre|Cs] -> prefix_1(Cs,PreR); _ -> nomatch end. split_1([CP1|Cs]=Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_integer(CP1) -> case CP1=:=C of true -> case prefix_1(Cs0, Needle) of nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc); Rest when Where =:= leading -> [rev(Curr), Rest]; Rest when Where =:= trailing -> split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]); Rest when Where =:= all -> split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc]) end; false -> split_1(Cs, Needle, 0, Where, append(CP1,Curr), Acc) end; split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc) when is_binary(Bin) -> case bin_search_str(Bin, Start, Cont0, Needle) of {nomatch,Sz,Cont} -> <> = Bin, split_1(Cont, Needle, 0, Where, [Keep|Curr0], Acc); {Before, [Cs0|Cont], After} -> Curr = add_non_empty(Before,Curr0), case Where of leading -> [rev(Curr),After]; trailing -> <<_/utf8, Cs/binary>> = Cs0, Next = byte_size(Bin) - byte_size(Cs), split_1([Bin|Cont], Needle, Next, Where, Curr0, [rev(Curr),After]); all -> split_1(After, Needle, 0, Where, [], [rev(Curr)|Acc]) end end; split_1(Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_list(Cs0) -> case unicode_util:cp(Cs0) of [C|Cs] -> case prefix_1(Cs0, Needle) of nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc); Rest when Where =:= leading -> [rev(Curr), Rest]; Rest when Where =:= trailing -> split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]); Rest when Where =:= all -> split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc]) end; [Other|Cs] -> split_1(Cs, Needle, 0, Where, append(Other,Curr), Acc); [] -> {rev(Curr), Acc} end; split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) -> case bin_search_str(Bin, Start, [], Needle) of {nomatch,_,_} -> <<_:Start/binary, Keep/binary>> = Bin, {rev([Keep|Curr0]), Acc}; {Before, [Cs0], After} -> case Where of leading -> [rev([Before|Curr0]),After]; trailing -> <<_/utf8, Cs/binary>> = Cs0, Next = byte_size(Bin) - byte_size(Cs), split_1(Bin, Needle, Next, Where, Curr0, [btoken(Before,Curr0),After]); all -> Next = byte_size(Bin) - byte_size(After), <<_:Start/binary, Keep/binary>> = Before, Curr = [Keep|Curr0], split_1(Bin, Needle, Next, Where, [], [rev(Curr)|Acc]) end end. lexemes_m([CP|_]=Cs0, {GCs,CPs,_}=Seps0, Ts) when is_integer(CP) -> case lists:member(CP, CPs) of true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of true -> lexemes_m(Cs2, Seps0, Ts); false -> Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []), lexemes_m(Rest, Seps, [Lexeme|Ts]) end; false -> Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []), lexemes_m(Rest, Seps, [Lexeme|Ts]) end; lexemes_m([Bin|Cont0], {GCs,_,_}=Seps0, Ts) when is_binary(Bin) -> case bin_search_inv(Bin, Cont0, GCs) of {nomatch,Cont} -> lexemes_m(Cont, Seps0, Ts); Cs -> Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs, Seps, []), lexemes_m(Rest, Seps, [Lexeme|Ts]) end; lexemes_m(Cs0, {GCs, _, _}=Seps0, Ts) when is_list(Cs0) -> case unicode_util:gc(Cs0) of [C|Cs] -> case lists:member(C, GCs) of true -> lexemes_m(Cs, Seps0, Ts); false -> Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []), lexemes_m(Rest, Seps, [Lexeme|Ts]) end; [] -> lists:reverse(Ts) end; lexemes_m(Bin, {GCs,_,_}=Seps0, Ts) when is_binary(Bin) -> case bin_search_inv(Bin, [], GCs) of {nomatch,_} -> lists:reverse(Ts); [Cs] -> Seps = search_compile(Seps0), {Lexeme,Rest} = lexeme_pick(Cs, Seps, []), lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts)) end. lexeme_pick([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps, Tkn) when is_integer(CP) -> case lists:member(CP, CPs) of true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of true -> {rev(Tkn), Cs2}; false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn)) end; false -> lexeme_pick(Cs1, Seps, [CP|Tkn]) end; lexeme_pick([Bin|Cont0], Seps, Tkn) when is_binary(Bin) -> case bin_search(Bin, Cont0, Seps) of {nomatch,_} -> lexeme_pick(Cont0, Seps, [Bin|Tkn]); [Left|_Cont] = Cs -> Bytes = byte_size(Bin) - byte_size(Left), <> = Bin, {btoken(Lexeme, Tkn), Cs} end; lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) -> case unicode_util:cp(Cs0) of [CP|Cs] -> case lists:member(CP, CPs) of true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of true -> {rev(Tkn), Cs2}; false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn)) end; false -> lexeme_pick(Cs, Seps, append(CP,Tkn)) end; [] -> {rev(Tkn), []} end; lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) -> case bin_search(Bin, [], Seps) of {nomatch,_} -> {btoken(Bin,Tkn), []}; [Left] -> Bytes = byte_size(Bin) - byte_size(Left), <> = Bin, {btoken(Lexeme, Tkn), Left} end. nth_lexeme_m([Bin|Cont0], {GCs,_,_}=Seps0, N) when is_binary(Bin) -> case bin_search_inv(Bin, Cont0, GCs) of {nomatch,Cont} -> nth_lexeme_m(Cont, Seps0, N); Cs when N > 1 -> Rest = lexeme_skip(Cs, Seps0), nth_lexeme_m(Rest, Seps0, N-1); Cs -> Seps = search_compile(Seps0), {Lexeme,_} = lexeme_pick(Cs, Seps, []), Lexeme end; nth_lexeme_m(Cs0, {GCs, _, _}=Seps0, N) when is_list(Cs0) -> case unicode_util:gc(Cs0) of [C|Cs] -> case lists:member(C, GCs) of true -> nth_lexeme_m(Cs, Seps0, N); false when N > 1 -> Cs1 = lexeme_skip(Cs, Seps0), nth_lexeme_m(Cs1, Seps0, N-1); false -> Seps = search_compile(Seps0), {Lexeme,_} = lexeme_pick(Cs0, Seps, []), Lexeme end; [] -> [] end; nth_lexeme_m(Bin, {GCs,_,_}=Seps0, N) when is_binary(Bin) -> Seps = search_compile(Seps0), case bin_search_inv(Bin, [], GCs) of [Cs] when N > 1 -> Cs1 = lexeme_skip(Cs, Seps), nth_lexeme_m(Cs1, Seps, N-1); [Cs] -> {Lexeme,_} = lexeme_pick(Cs, Seps, []), Lexeme; {nomatch,_} -> <<>> end. lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) -> case lists:member(CP, CPs) of true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of true -> Cs2; false -> lexeme_skip(Cs2, Seps) end; false -> lexeme_skip(Cs1, Seps) end; lexeme_skip([Bin|Cont0], Seps0) when is_binary(Bin) -> Seps = search_compile(Seps0), case bin_search(Bin, Cont0, Seps) of {nomatch,_} -> lexeme_skip(Cont0, Seps); Cs -> tl(unicode_util:gc(Cs)) end; lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) -> case unicode_util:cp(Cs0) of [CP|Cs] -> case lists:member(CP, CPs) of true -> [GC|Cs2] = unicode_util:gc(Cs0), case lists:member(GC, GCs) of true -> Cs2; false -> lexeme_skip(Cs2, Seps) end; false -> lexeme_skip(Cs, Seps) end; [] -> [] end; lexeme_skip(Bin, Seps0) when is_binary(Bin) -> Seps = search_compile(Seps0), case bin_search(Bin, [], Seps) of {nomatch,_} -> <<>>; [Left] -> tl(unicode_util:gc(Left)) end. find_l([C1|Cs]=Cs0, [C|_]=Needle) when is_integer(C1) -> case C1 of C -> case prefix_1(Cs0, Needle) of nomatch -> find_l(Cs, Needle); _ -> Cs0 end; _ -> find_l(Cs, Needle) end; find_l([Bin|Cont0], Needle) when is_binary(Bin) -> case bin_search_str(Bin, 0, Cont0, Needle) of {nomatch, _, Cont} -> find_l(Cont, Needle); {_Before, Cs, _After} -> Cs end; find_l(Cs0, [C|_]=Needle) when is_list(Cs0) -> case unicode_util:cp(Cs0) of [C|Cs] -> case prefix_1(Cs0, Needle) of nomatch -> find_l(Cs, Needle); _ -> Cs0 end; [_C|Cs] -> find_l(Cs, Needle); [] -> nomatch end; find_l(Bin, Needle) -> case bin_search_str(Bin, 0, [], Needle) of {nomatch,_,_} -> nomatch; {_Before, [Cs], _After} -> Cs end. find_r([Cp|Cs]=Cs0, [C|_]=Needle, Res) when is_integer(Cp) -> case Cp of C -> case prefix_1(Cs0, Needle) of nomatch -> find_r(Cs, Needle, Res); _ -> find_r(Cs, Needle, Cs0) end; _ -> find_r(Cs, Needle, Res) end; find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) -> case bin_search_str(Bin, 0, Cont0, Needle) of {nomatch,_,Cont} -> find_r(Cont, Needle, Res); {_, Cs0, _} -> [_|Cs] = unicode_util:gc(Cs0), find_r(Cs, Needle, Cs0) end; find_r(Cs0, [C|_]=Needle, Res) when is_list(Cs0) -> case unicode_util:cp(Cs0) of [C|Cs] -> case prefix_1(Cs0, Needle) of nomatch -> find_r(Cs, Needle, Res); _ -> find_r(Cs, Needle, Cs0) end; [_C|Cs] -> find_r(Cs, Needle, Res); [] -> Res end; find_r(Bin, Needle, Res) -> case bin_search_str(Bin, 0, [], Needle) of {nomatch,_,_} -> Res; {_Before, [Cs0], _After} -> <<_/utf8, Cs/binary>> = Cs0, find_r(Cs, Needle, Cs0) end. %% These are used to avoid creating lists around binaries %% might be unnecessary, is there a better solution? btoken(Token, []) -> Token; btoken(BinPart, [C]) when is_integer(C) -> <>; btoken(<<>>, Tkn) -> lists:reverse(Tkn); btoken(BinPart, Cs) -> [lists:reverse(Cs),BinPart]. rev([B]) when is_binary(B) -> B; rev(L) when is_list(L) -> lists:reverse(L); rev(C) when is_integer(C) -> C. append(Char, <<>>) when is_integer(Char) -> [Char]; append(Char, <<>>) when is_list(Char) -> Char; append(Char, Bin) when is_binary(Bin) -> [Char,Bin]; append(Char, Str) when is_integer(Char) -> [Char|Str]; append(GC, Str) when is_list(GC) -> GC ++ Str. stack(Bin, []) -> Bin; stack(<<>>, St) -> St; stack([], St) -> St; stack(Bin, St) -> [Bin|St]. add_non_empty(<<>>, L) -> L; add_non_empty(Token, L) -> [Token|L]. cp_prefix(Orig, Cont) -> case unicode_util:cp(Cont) of [] -> Orig; [Cp|Rest] -> cp_prefix_1(Orig, Cp, Rest) end. cp_prefix_1(Orig, Until, Cont) -> case unicode_util:cp(Orig) of [Until|Rest] -> case equal(Rest, Cont) of true -> []; false-> [Until|cp_prefix_1(Rest, Until, Cont)] end; [CP|Rest] -> [CP|cp_prefix_1(Rest, Until, Cont)] end. %% Binary special bin_search(Bin, Cont, {Seps,_,BP}) -> bin_search_loop(Bin, 0, BP, Cont, Seps). %% Need to work with [<<$a>>, <<778/utf8>>], %% i.e. å in nfd form $a "COMBINING RING ABOVE" %% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>> %% combined with other characters are currently ignored. search_pattern({_,_,_}=P) -> P; search_pattern(Seps) -> CPs = search_cp(Seps), {Seps, CPs, undefined}. search_compile({Sep, CPs, undefined}) -> {Sep, CPs, binary:compile_pattern(bin_pattern(CPs))}; search_compile({_,_,_}=Compiled) -> Compiled. search_cp([CP|Seps]) when is_integer(CP) -> [CP|search_cp(Seps)]; search_cp([Pattern|Seps]) -> [CP|_] = unicode_util:cp(Pattern), [CP|search_cp(Seps)]; search_cp([]) -> []. bin_pattern([CP|Seps]) -> [<>|bin_pattern(Seps)]; bin_pattern([]) -> []. bin_search_loop(Bin0, Start, _, Cont, _Seps) when byte_size(Bin0) =< Start; Start < 0 -> {nomatch, Cont}; bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) -> <<_:Start/binary, Bin/binary>> = Bin0, case binary:match(Bin, BinSeps) of nomatch -> {nomatch,Cont}; {Where, _CL} when Cont =:= [] -> <<_:Where/binary, Cont1/binary>> = Bin, [GC|Cont2] = unicode_util:gc(Cont1), case lists:member(GC, Seps) of false when Cont2 =:= [] -> {nomatch, []}; false -> Next = byte_size(Bin0) - byte_size(Cont2), bin_search_loop(Bin0, Next, BinSeps, Cont, Seps); true -> [Cont1] end; {Where, _CL} -> <<_:Where/binary, Cont0/binary>> = Bin, Cont1 = [Cont0|Cont], [GC|Cont2] = unicode_util:gc(Cont1), case lists:member(GC, Seps) of false -> case Cont2 of [BinR|Cont] when is_binary(BinR) -> Next = byte_size(Bin0) - byte_size(BinR), bin_search_loop(Bin0, Next, BinSeps, Cont, Seps); _ -> {nomatch, Cont2} end; true -> Cont1 end end. bin_search_inv(<<>>, Cont, _) -> {nomatch, Cont}; bin_search_inv(Bin, Cont, [Sep]) -> bin_search_inv_1(Bin, Cont, Sep); bin_search_inv(Bin, Cont, Seps) -> bin_search_inv_n(Bin, Cont, Seps). bin_search_inv_1(<>=Bin0, Cont, Sep) -> case BinRest of <> when ?ASCII_LIST(CP1, CP2) -> case CP1 of Sep -> bin_search_inv_1(BinRest, Cont, Sep); _ -> [Bin0|Cont] end; _ when Cont =:= [] -> case unicode_util:gc(Bin0) of [Sep|Bin] -> bin_search_inv_1(Bin, Cont, Sep); _ -> [Bin0|Cont] end; _ -> case unicode_util:gc([Bin0|Cont]) of [Sep|[Bin|Cont]] when is_binary(Bin) -> bin_search_inv_1(Bin, Cont, Sep); [Sep|Cs] -> {nomatch, Cs}; _ -> [Bin0|Cont] end end; bin_search_inv_1(<<>>, Cont, _Sep) -> {nomatch, Cont}; bin_search_inv_1([], Cont, _Sep) -> {nomatch, Cont}; bin_search_inv_1(Bin, _, _) -> error({badarg, Bin}). bin_search_inv_n(<>=Bin0, Cont, Seps) -> case BinRest of <> when ?ASCII_LIST(CP1, CP2) -> case lists:member(CP1,Seps) of true -> bin_search_inv_n(BinRest, Cont, Seps); false -> [Bin0|Cont] end; _ when Cont =:= [] -> [GC|Bin] = unicode_util:gc(Bin0), case lists:member(GC, Seps) of true -> bin_search_inv_n(Bin, Cont, Seps); false -> [Bin0|Cont] end; _ -> [GC|Cs0] = unicode_util:gc([Bin0|Cont]), case lists:member(GC, Seps) of false -> [Bin0|Cont]; true -> case Cs0 of [Bin|Cont] when is_binary(Bin) -> bin_search_inv_n(Bin, Cont, Seps); _ -> {nomatch, Cs0} end end end; bin_search_inv_n(<<>>, Cont, _Sep) -> {nomatch, Cont}; bin_search_inv_n([], Cont, _Sep) -> {nomatch, Cont}; bin_search_inv_n(Bin, _, _) -> error({badarg, Bin}). bin_search_str(Bin0, Start, [], SearchCPs) -> Compiled = binary:compile_pattern(unicode:characters_to_binary(SearchCPs)), bin_search_str_1(Bin0, Start, Compiled, SearchCPs); bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) -> First = binary:compile_pattern(<>), bin_search_str_2(Bin0, Start, Cont, First, SearchCPs). bin_search_str_1(Bin0, Start, First, SearchCPs) -> <<_:Start/binary, Bin/binary>> = Bin0, case binary:match(Bin, First) of nomatch -> {nomatch, byte_size(Bin0), []}; {Where0, _} -> Where = Start+Where0, <> = Bin0, case prefix_1(Cs0, SearchCPs) of nomatch -> <<_/utf8, Cs/binary>> = Cs0, KeepSz = byte_size(Bin0) - byte_size(Cs), bin_search_str_1(Bin0, KeepSz, First, SearchCPs); [] -> {Keep, [Cs0], <<>>}; Rest -> {Keep, [Cs0], Rest} end end. bin_search_str_2(Bin0, Start, Cont, First, SearchCPs) -> <<_:Start/binary, Bin/binary>> = Bin0, case binary:match(Bin, First) of nomatch -> {nomatch, byte_size(Bin0), Cont}; {Where0, _} -> Where = Start+Where0, <> = Bin0, [GC|Cs]=unicode_util:gc(Cs0), case prefix_1(stack(Cs0,Cont), SearchCPs) of nomatch when is_binary(Cs) -> KeepSz = byte_size(Bin0) - byte_size(Cs), bin_search_str_2(Bin0, KeepSz, Cont, First, SearchCPs); nomatch -> {nomatch, Where, stack([GC|Cs],Cont)}; [] -> {Keep, [Cs0|Cont], <<>>}; Rest -> {Keep, [Cs0|Cont], Rest} end end. %%--------------------------------------------------------------------------- %% OLD lists API kept for backwards compability %%--------------------------------------------------------------------------- %% Robert's bit %% len(String) %% Return the length of a string. -spec len(String) -> Length when String :: string(), Length :: non_neg_integer(). len(S) -> erlang:length(S). %% equal(String1, String2) %% Test if 2 strings are equal. %% -spec equal(String1, String2) -> boolean() when %% String1 :: string(), %% String2 :: string(). %% equal(S, S) -> true; %% equal(_, _) -> false. %% concat(String1, String2) %% Concatenate 2 strings. -spec concat(String1, String2) -> String3 when String1 :: string(), String2 :: string(), String3 :: string(). concat(S1, S2) -> S1 ++ S2. %% chr(String, Char) %% rchr(String, Char) %% Return the first/last index of the character in a string. -spec chr(String, Character) -> Index when String :: string(), Character :: char(), Index :: non_neg_integer(). chr(S, C) when is_integer(C) -> chr(S, C, 1). chr([C|_Cs], C, I) -> I; chr([_|Cs], C, I) -> chr(Cs, C, I+1); chr([], _C, _I) -> 0. -spec rchr(String, Character) -> Index when String :: string(), Character :: char(), Index :: non_neg_integer(). rchr(S, C) when is_integer(C) -> rchr(S, C, 1, 0). rchr([C|Cs], C, I, _L) -> %Found one, now find next! rchr(Cs, C, I+1, I); rchr([_|Cs], C, I, L) -> rchr(Cs, C, I+1, L); rchr([], _C, _I, L) -> L. %% str(String, SubString) %% rstr(String, SubString) %% index(String, SubString) %% Return the first/last index of the sub-string in a string. %% index/2 is kept for backwards compatibility. -spec str(String, SubString) -> Index when String :: string(), SubString :: string(), Index :: non_neg_integer(). str(S, Sub) when is_list(Sub) -> str(S, Sub, 1). str([C|S], [C|Sub], I) -> case l_prefix(Sub, S) of true -> I; false -> str(S, [C|Sub], I+1) end; str([_|S], Sub, I) -> str(S, Sub, I+1); str([], _Sub, _I) -> 0. -spec rstr(String, SubString) -> Index when String :: string(), SubString :: string(), Index :: non_neg_integer(). rstr(S, Sub) when is_list(Sub) -> rstr(S, Sub, 1, 0). rstr([C|S], [C|Sub], I, L) -> case l_prefix(Sub, S) of true -> rstr(S, [C|Sub], I+1, I); false -> rstr(S, [C|Sub], I+1, L) end; rstr([_|S], Sub, I, L) -> rstr(S, Sub, I+1, L); rstr([], _Sub, _I, L) -> L. l_prefix([C|Pre], [C|String]) -> l_prefix(Pre, String); l_prefix([], String) when is_list(String) -> true; l_prefix(Pre, String) when is_list(Pre), is_list(String) -> false. %% span(String, Chars) -> Length. %% cspan(String, Chars) -> Length. -spec span(String, Chars) -> Length when String :: string(), Chars :: string(), Length :: non_neg_integer(). span(S, Cs) when is_list(Cs) -> span(S, Cs, 0). span([C|S], Cs, I) -> case member(C, Cs) of true -> span(S, Cs, I+1); false -> I end; span([], _Cs, I) -> I. -spec cspan(String, Chars) -> Length when String :: string(), Chars :: string(), Length :: non_neg_integer(). cspan(S, Cs) when is_list(Cs) -> cspan(S, Cs, 0). cspan([C|S], Cs, I) -> case member(C, Cs) of true -> I; false -> cspan(S, Cs, I+1) end; cspan([], _Cs, I) -> I. %% substr(String, Start) %% substr(String, Start, Length) %% Extract a sub-string from String. -spec substr(String, Start) -> SubString when String :: string(), SubString :: string(), Start :: pos_integer(). substr(String, 1) when is_list(String) -> String; substr(String, S) when is_integer(S), S > 1 -> substr2(String, S). -spec substr(String, Start, Length) -> SubString when String :: string(), SubString :: string(), Start :: pos_integer(), Length :: non_neg_integer(). substr(String, S, L) when is_integer(S), S >= 1, is_integer(L), L >= 0 -> substr1(substr2(String, S), L). substr1([C|String], L) when L > 0 -> [C|substr1(String, L-1)]; substr1(String, _L) when is_list(String) -> []. %Be nice! substr2(String, 1) when is_list(String) -> String; substr2([_|String], S) -> substr2(String, S-1). %% tokens(String, Seperators). %% Return a list of tokens seperated by characters in Seperators. -spec tokens(String, SeparatorList) -> Tokens when String :: string(), SeparatorList :: string(), Tokens :: [Token :: nonempty_string()]. tokens(S, Seps) -> case Seps of [] -> case S of [] -> []; [_|_] -> [S] end; [C] -> tokens_single_1(lists:reverse(S), C, []); [_|_] -> tokens_multiple_1(lists:reverse(S), Seps, []) end. tokens_single_1([Sep|S], Sep, Toks) -> tokens_single_1(S, Sep, Toks); tokens_single_1([C|S], Sep, Toks) -> tokens_single_2(S, Sep, Toks, [C]); tokens_single_1([], _, Toks) -> Toks. tokens_single_2([Sep|S], Sep, Toks, Tok) -> tokens_single_1(S, Sep, [Tok|Toks]); tokens_single_2([C|S], Sep, Toks, Tok) -> tokens_single_2(S, Sep, Toks, [C|Tok]); tokens_single_2([], _Sep, Toks, Tok) -> [Tok|Toks]. tokens_multiple_1([C|S], Seps, Toks) -> case member(C, Seps) of true -> tokens_multiple_1(S, Seps, Toks); false -> tokens_multiple_2(S, Seps, Toks, [C]) end; tokens_multiple_1([], _Seps, Toks) -> Toks. tokens_multiple_2([C|S], Seps, Toks, Tok) -> case member(C, Seps) of true -> tokens_multiple_1(S, Seps, [Tok|Toks]); false -> tokens_multiple_2(S, Seps, Toks, [C|Tok]) end; tokens_multiple_2([], _Seps, Toks, Tok) -> [Tok|Toks]. -spec chars(Character, Number) -> String when Character :: char(), Number :: non_neg_integer(), String :: string(). chars(C, N) -> chars(C, N, []). -spec chars(Character, Number, Tail) -> String when Character :: char(), Number :: non_neg_integer(), Tail :: string(), String :: string(). chars(C, N, Tail) when N > 0 -> chars(C, N-1, [C|Tail]); chars(C, 0, Tail) when is_integer(C) -> Tail. %% Torbjörn's bit. %%% COPIES %%% -spec copies(String, Number) -> Copies when String :: string(), Copies :: string(), Number :: non_neg_integer(). copies(CharList, Num) when is_list(CharList), is_integer(Num), Num >= 0 -> copies(CharList, Num, []). copies(_CharList, 0, R) -> R; copies(CharList, Num, R) -> copies(CharList, Num-1, CharList++R). %%% WORDS %%% -spec words(String) -> Count when String :: string(), Count :: pos_integer(). words(String) -> words(String, $\s). -spec words(String, Character) -> Count when String :: string(), Character :: char(), Count :: pos_integer(). words(String, Char) when is_integer(Char) -> w_count(strip(String, both, Char), Char, 0). w_count([], _, Num) -> Num+1; w_count([H|T], H, Num) -> w_count(strip(T, left, H), H, Num+1); w_count([_H|T], Char, Num) -> w_count(T, Char, Num). %%% SUB_WORDS %%% -spec sub_word(String, Number) -> Word when String :: string(), Word :: string(), Number :: integer(). sub_word(String, Index) -> sub_word(String, Index, $\s). -spec sub_word(String, Number, Character) -> Word when String :: string(), Word :: string(), Number :: integer(), Character :: char(). sub_word(String, Index, Char) when is_integer(Index), is_integer(Char) -> case words(String, Char) of Num when Num < Index -> []; _Num -> s_word(strip(String, left, Char), Index, Char, 1, []) end. s_word([], _, _, _,Res) -> lists:reverse(Res); s_word([Char|_],Index,Char,Index,Res) -> lists:reverse(Res); s_word([H|T],Index,Char,Index,Res) -> s_word(T,Index,Char,Index,[H|Res]); s_word([Char|T],Stop,Char,Index,Res) when Index < Stop -> s_word(strip(T,left,Char),Stop,Char,Index+1,Res); s_word([_|T],Stop,Char,Index,Res) when Index < Stop -> s_word(T,Stop,Char,Index,Res). %%% STRIP %%% -spec strip(string()) -> string(). strip(String) -> strip(String, both). -spec strip(String, Direction) -> Stripped when String :: string(), Stripped :: string(), Direction :: 'left' | 'right' | 'both'. strip(String, left) -> strip_left(String, $\s); strip(String, right) -> strip_right(String, $\s); strip(String, both) -> strip_right(strip_left(String, $\s), $\s). -spec strip(String, Direction, Character) -> Stripped when String :: string(), Stripped :: string(), Direction :: 'left' | 'right' | 'both', Character :: char(). strip(String, right, Char) -> strip_right(String, Char); strip(String, left, Char) -> strip_left(String, Char); strip(String, both, Char) -> strip_right(strip_left(String, Char), Char). strip_left([Sc|S], Sc) -> strip_left(S, Sc); strip_left([_|_]=S, Sc) when is_integer(Sc) -> S; strip_left([], Sc) when is_integer(Sc) -> []. strip_right([Sc|S], Sc) -> case strip_right(S, Sc) of [] -> []; T -> [Sc|T] end; strip_right([C|S], Sc) -> [C|strip_right(S, Sc)]; strip_right([], Sc) when is_integer(Sc) -> []. %%% LEFT %%% -spec left(String, Number) -> Left when String :: string(), Left :: string(), Number :: non_neg_integer(). left(String, Len) when is_integer(Len) -> left(String, Len, $\s). -spec left(String, Number, Character) -> Left when String :: string(), Left :: string(), Number :: non_neg_integer(), Character :: char(). left(String, Len, Char) when is_integer(Char) -> Slen = erlang:length(String), if Slen > Len -> substr(String, 1, Len); Slen < Len -> l_pad(String, Len-Slen, Char); Slen =:= Len -> String end. l_pad(String, Num, Char) -> String ++ chars(Char, Num). %%% RIGHT %%% -spec right(String, Number) -> Right when String :: string(), Right :: string(), Number :: non_neg_integer(). right(String, Len) when is_integer(Len) -> right(String, Len, $\s). -spec right(String, Number, Character) -> Right when String :: string(), Right :: string(), Number :: non_neg_integer(), Character :: char(). right(String, Len, Char) when is_integer(Char) -> Slen = erlang:length(String), if Slen > Len -> substr(String, Slen-Len+1); Slen < Len -> r_pad(String, Len-Slen, Char); Slen =:= Len -> String end. r_pad(String, Num, Char) -> chars(Char, Num, String). %%% CENTRE %%% -spec centre(String, Number) -> Centered when String :: string(), Centered :: string(), Number :: non_neg_integer(). centre(String, Len) when is_integer(Len) -> centre(String, Len, $\s). -spec centre(String, Number, Character) -> Centered when String :: string(), Centered :: string(), Number :: non_neg_integer(), Character :: char(). centre(String, 0, Char) when is_list(String), is_integer(Char) -> []; % Strange cases to centre string centre(String, Len, Char) when is_integer(Char) -> Slen = erlang:length(String), if Slen > Len -> substr(String, (Slen-Len) div 2 + 1, Len); Slen < Len -> N = (Len-Slen) div 2, r_pad(l_pad(String, Len-(Slen+N), Char), N, Char); Slen =:= Len -> String end. %%% SUB_STRING %%% -spec sub_string(String, Start) -> SubString when String :: string(), SubString :: string(), Start :: pos_integer(). sub_string(String, Start) -> substr(String, Start). -spec sub_string(String, Start, Stop) -> SubString when String :: string(), SubString :: string(), Start :: pos_integer(), Stop :: pos_integer(). sub_string(String, Start, Stop) -> substr(String, Start, Stop - Start + 1). %% ISO/IEC 8859-1 (latin1) letters are converted, others are ignored %% to_lower_char(C) when is_integer(C), $A =< C, C =< $Z -> C + 32; to_lower_char(C) when is_integer(C), 16#C0 =< C, C =< 16#D6 -> C + 32; to_lower_char(C) when is_integer(C), 16#D8 =< C, C =< 16#DE -> C + 32; to_lower_char(C) -> C. to_upper_char(C) when is_integer(C), $a =< C, C =< $z -> C - 32; to_upper_char(C) when is_integer(C), 16#E0 =< C, C =< 16#F6 -> C - 32; to_upper_char(C) when is_integer(C), 16#F8 =< C, C =< 16#FE -> C - 32; to_upper_char(C) -> C. -spec to_lower(String) -> Result when String :: io_lib:latin1_string(), Result :: io_lib:latin1_string() ; (Char) -> CharResult when Char :: char(), CharResult :: char(). to_lower(S) when is_list(S) -> [to_lower_char(C) || C <- S]; to_lower(C) when is_integer(C) -> to_lower_char(C). -spec to_upper(String) -> Result when String :: io_lib:latin1_string(), Result :: io_lib:latin1_string() ; (Char) -> CharResult when Char :: char(), CharResult :: char(). to_upper(S) when is_list(S) -> [to_upper_char(C) || C <- S]; to_upper(C) when is_integer(C) -> to_upper_char(C). -spec join(StringList, Separator) -> String when StringList :: [string()], Separator :: string(), String :: string(). join([], Sep) when is_list(Sep) -> []; join([H|T], Sep) -> H ++ lists:append([Sep ++ X || X <- T]).