%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 1996-2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
%% A string library that works on grapheme clusters, with the exception
%% of codepoints of class 'prepend' and non modern (or decomposed) Hangul.
%% If these codepoints appear, functions like 'find/2' may return a string
%% which starts inside a grapheme cluster.
%% These exceptions are made because the codepoints classes are
%% seldom used and require that we are able look at previous codepoints in
%% the stream and is thus hard to implement effectively.
%%
%% GC (grapheme cluster) implies that the length of string 'ß↑e̊' is 3 though
%% it is represented by the codepoints [223,8593,101,778] or the
%% utf8 binary <<195,159,226,134,145,101,204,138>>
%%
%% And that searching for strings or graphemes finds the correct positions:
%%
%% find("eeeee̊eee", "e̊") -> "e̊ee".:
%% find("1£4e̊abcdef", "e") -> "ef"
%%
%% Most functions expect all input to be normalized to one form,
%% see unicode:characters_to_nfc and unicode:characters_to_nfd functions.
%% When appending strings no checking is done to verify that the
%% result is valid unicode strings.
%%
%% The functions may crash for invalid utf-8 input.
%%
%% Return value should be kept consistent when return type is
%% unicode:chardata() i.e. binary input => binary output,
%% list input => list output mixed input => mixed output
%%
-module(string).
-export([is_empty/1, length/1, to_graphemes/1,
reverse/1,
equal/2, equal/3, equal/4,
slice/2, slice/3,
pad/2, pad/3, pad/4, trim/1, trim/2, trim/3, chomp/1,
take/2, take/3, take/4,
lexemes/2, nth_lexeme/3,
uppercase/1, lowercase/1, titlecase/1,casefold/1,
prefix/2,
split/2,split/3,replace/3,replace/4,
find/2,find/3,
next_codepoint/1, next_grapheme/1
]).
-export([to_float/1, to_integer/1]).
%% Old (will be deprecated) lists/string API kept for backwards compability
-export([len/1, concat/2, % equal/2, (extended in the new api)
chr/2,rchr/2,str/2,rstr/2,
span/2,cspan/2,substr/2,substr/3, tokens/2,
chars/2,chars/3]).
-export([copies/2,words/1,words/2,strip/1,strip/2,strip/3,
sub_word/2,sub_word/3,left/2,left/3,right/2,right/3,
sub_string/2,sub_string/3,centre/2,centre/3, join/2]).
-export([to_upper/1, to_lower/1]).
%%
-import(lists,[member/2]).
-compile({no_auto_import,[length/1]}).
-compile({inline, [btoken/2, rev/1, append/2, stack/2, search_compile/1]}).
-define(ASCII_LIST(CP1,CP2), CP1 < 256, CP2 < 256, CP1 =/= $\r).
-export_type([grapheme_cluster/0]).
-type grapheme_cluster() :: char() | [char()].
-type direction() :: 'leading' | 'trailing'.
-dialyzer({no_improper_lists, [stack/2, length_b/3]}).
%%% BIFs internal (not documented) should not to be used outside of this module
%%% May be removed
-export([list_to_float/1, list_to_integer/1]).
%% Uses bifs: string:list_to_float/1 and string:list_to_integer/1
-spec list_to_float(String) -> {Float, Rest} | {'error', Reason} when
String :: string(),
Float :: float(),
Rest :: string(),
Reason :: 'no_float' | 'not_a_list'.
list_to_float(_) ->
erlang:nif_error(undef).
-spec list_to_integer(String) -> {Int, Rest} | {'error', Reason} when
String :: string(),
Int :: integer(),
Rest :: string(),
Reason :: 'no_integer' | 'not_a_list'.
list_to_integer(_) ->
erlang:nif_error(undef).
%%% End of BIFs
%% Check if string is the empty string
-spec is_empty(String::unicode:chardata()) -> boolean().
is_empty([]) -> true;
is_empty(<<>>) -> true;
is_empty([L|R]) -> is_empty(L) andalso is_empty(R);
is_empty(_) -> false.
%% Count the number of grapheme clusters in chardata
-spec length(String::unicode:chardata()) -> non_neg_integer().
length(<<CP1/utf8, Bin/binary>>) ->
length_b(Bin, CP1, 0);
length(CD) ->
length_1(CD, 0).
%% Convert a string to a list of grapheme clusters
-spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()].
to_graphemes(CD0) ->
case unicode_util:gc(CD0) of
[GC|CD] -> [GC|to_graphemes(CD)];
[] -> []
end.
%% Compare two strings return boolean, assumes that the input are
%% normalized to same form, see unicode:characters_to_nfX_xxx(..)
-spec equal(A, B) -> boolean() when
A::unicode:chardata(),
B::unicode:chardata().
equal(A,B) when is_binary(A), is_binary(B) ->
A =:= B;
equal(A,B) ->
equal_1(A,B).
%% Compare two strings return boolean, assumes that the input are
%% normalized to same form, see unicode:characters_to_nfX_xxx(..)
%% does casefold on the fly
-spec equal(A, B, IgnoreCase) -> boolean() when
A::unicode:chardata(),
B::unicode:chardata(),
IgnoreCase :: boolean().
equal(A, B, false) ->
equal(A,B);
equal(A, B, true) ->
equal_nocase(A,B).
%% Compare two strings return boolean
%% if specified does casefold and normalization on the fly
-spec equal(A, B, IgnoreCase, Norm) -> boolean() when
A :: unicode:chardata(),
B :: unicode:chardata(),
IgnoreCase :: boolean(),
Norm :: 'none' | 'nfc' | 'nfd' | 'nfkc' | 'nfkd'.
equal(A, B, Case, none) ->
equal(A,B,Case);
equal(A, B, false, Norm) ->
equal_norm(A, B, Norm);
equal(A, B, true, Norm) ->
equal_norm_nocase(A, B, Norm).
%% Reverse grapheme clusters
-spec reverse(String::unicode:chardata()) -> [grapheme_cluster()].
reverse(<<CP1/utf8, Rest/binary>>) ->
reverse_b(Rest, CP1, []);
reverse(CD) ->
reverse_1(CD, []).
%% Slice a string and return rest of string
%% Note: counts grapheme_clusters
-spec slice(String, Start) -> Slice when
String::unicode:chardata(),
Start :: non_neg_integer(),
Slice :: unicode:chardata().
slice(CD, N) when is_integer(N), N >= 0 ->
case slice_l0(CD, N) of
[] when is_binary(CD) -> <<>>;
Res -> Res
end.
-spec slice(String, Start, Length) -> Slice when
String::unicode:chardata(),
Start :: non_neg_integer(),
Length :: 'infinity' | non_neg_integer(),
Slice :: unicode:chardata().
slice(CD, N, Length)
when is_integer(N), N >= 0, is_integer(Length), Length > 0 ->
case slice_l0(CD, N) of
[] when is_binary(CD) -> <<>>;
L -> slice_trail(L, Length)
end;
slice(CD, N, infinity) ->
case slice_l0(CD, N) of
[] when is_binary(CD) -> <<>>;
Res -> Res
end;
slice(CD, _, 0) ->
case is_binary(CD) of
true -> <<>>;
false -> []
end.
%% Pad a string to desired length
-spec pad(String, Length) -> unicode:charlist() when
String ::unicode:chardata(),
Length :: integer().
pad(CD, Length) ->
pad(CD, Length, trailing, $\s).
-spec pad(String, Length, Dir) -> unicode:charlist() when
String ::unicode:chardata(),
Length :: integer(),
Dir :: direction() | 'both'.
pad(CD, Length, Dir) ->
pad(CD, Length, Dir, $\s).
-spec pad(String, Length, Dir, Char) -> unicode:charlist() when
String ::unicode:chardata(),
Length :: integer(),
Dir :: direction() | 'both',
Char :: grapheme_cluster().
pad(CD, Length, leading, Char) when is_integer(Length) ->
Len = length(CD),
[lists:duplicate(max(0, Length-Len), Char), CD];
pad(CD, Length, trailing, Char) when is_integer(Length) ->
Len = length(CD),
[CD|lists:duplicate(max(0, Length-Len), Char)];
pad(CD, Length, both, Char) when is_integer(Length) ->
Len = length(CD),
Size = max(0, Length-Len),
Pre = lists:duplicate(Size div 2, Char),
Post = case Size rem 2 of
1 -> [Char];
_ -> []
end,
[Pre, CD, Pre|Post].
%% Strip characters from whitespace or Separator in Direction
-spec trim(String) -> unicode:chardata() when
String :: unicode:chardata().
trim(Str) ->
trim(Str, both, unicode_util:whitespace()).
-spec trim(String, Dir) -> unicode:chardata() when
String :: unicode:chardata(),
Dir :: direction() | 'both'.
trim(Str, Dir) ->
trim(Str, Dir, unicode_util:whitespace()).
-spec trim(String, Dir, Characters) -> unicode:chardata() when
String :: unicode:chardata(),
Dir :: direction() | 'both',
Characters :: [grapheme_cluster()].
trim(Str, _, []) -> Str;
trim(Str, leading, [Sep]) when is_list(Str), Sep < 256 ->
trim_ls(Str, Sep);
trim(Str, leading, Sep) when is_list(Sep) ->
trim_l(Str, Sep);
trim(Str, trailing, [Sep]) when is_list(Str), Sep < 256 ->
trim_ts(Str, Sep);
trim(Str, trailing, Seps0) when is_list(Seps0) ->
Seps = search_pattern(Seps0),
trim_t(Str, 0, Seps);
trim(Str, both, Sep) when is_list(Sep) ->
trim(trim(Str,leading,Sep), trailing, Sep).
%% Delete trailing newlines or \r\n
-spec chomp(String::unicode:chardata()) -> unicode:chardata().
chomp(Str) ->
trim(Str, trailing, [[$\r,$\n],$\n]).
%% Split String into two parts where the leading part consists of Characters
-spec take(String, Characters) -> {Leading, Trailing} when
String::unicode:chardata(),
Characters::[grapheme_cluster()],
Leading::unicode:chardata(),
Trailing::unicode:chardata().
take(Str, Sep) ->
take(Str, Sep, false, leading).
-spec take(String, Characters, Complement) -> {Leading, Trailing} when
String::unicode:chardata(),
Characters::[grapheme_cluster()],
Complement::boolean(),
Leading::unicode:chardata(),
Trailing::unicode:chardata().
take(Str, Sep, Complement) ->
take(Str, Sep, Complement, leading).
-spec take(String, Characters, Complement, Dir) -> {Leading, Trailing} when
String::unicode:chardata(),
Characters::[grapheme_cluster()],
Complement::boolean(),
Dir::direction(),
Leading::unicode:chardata(),
Trailing::unicode:chardata().
take(Str, [], Complement, Dir) ->
Empty = case is_binary(Str) of true -> <<>>; false -> [] end,
case {Complement,Dir} of
{false, leading} -> {Empty, Str};
{false, trailing} -> {Str, Empty};
{true, leading} -> {Str, Empty};
{true, trailing} -> {Empty, Str}
end;
take(Str, Sep, false, leading) ->
take_l(Str, Sep, []);
take(Str, Sep0, true, leading) ->
Sep = search_pattern(Sep0),
take_lc(Str, Sep, []);
take(Str, Sep0, false, trailing) ->
Sep = search_pattern(Sep0),
take_t(Str, 0, Sep);
take(Str, Sep0, true, trailing) ->
Sep = search_pattern(Sep0),
take_tc(Str, 0, Sep).
%% Uppercase all chars in Str
-spec uppercase(String::unicode:chardata()) -> unicode:chardata().
uppercase(CD) when is_list(CD) ->
try uppercase_list(CD, false)
catch unchanged -> CD
end;
uppercase(<<CP1/utf8, Rest/binary>>=Orig) ->
try uppercase_bin(CP1, Rest, false) of
List -> unicode:characters_to_binary(List)
catch unchanged -> Orig
end;
uppercase(<<>>) ->
<<>>.
%% Lowercase all chars in Str
-spec lowercase(String::unicode:chardata()) -> unicode:chardata().
lowercase(CD) when is_list(CD) ->
try lowercase_list(CD, false)
catch unchanged -> CD
end;
lowercase(<<CP1/utf8, Rest/binary>>=Orig) ->
try lowercase_bin(CP1, Rest, false) of
List -> unicode:characters_to_binary(List)
catch unchanged -> Orig
end;
lowercase(<<>>) ->
<<>>.
%% Make a titlecase of the first char in Str
-spec titlecase(String::unicode:chardata()) -> unicode:chardata().
titlecase(CD) when is_list(CD) ->
case unicode_util:titlecase(CD) of
[GC|Tail] -> append(GC,Tail);
Empty -> Empty
end;
titlecase(CD) when is_binary(CD) ->
case unicode_util:titlecase(CD) of
[CP|Chars] when is_integer(CP) -> <<CP/utf8,Chars/binary>>;
[CPs|Chars] ->
<< << <<CP/utf8>> || CP <- CPs>>/binary, Chars/binary>>;
[] -> <<>>
end.
%% Make a comparable string of the Str should be used for equality tests only
-spec casefold(String::unicode:chardata()) -> unicode:chardata().
casefold(CD) when is_list(CD) ->
try casefold_list(CD, false)
catch unchanged -> CD
end;
casefold(<<CP1/utf8, Rest/binary>>=Orig) ->
try casefold_bin(CP1, Rest, false) of
List -> unicode:characters_to_binary(List)
catch unchanged -> Orig
end;
casefold(<<>>) ->
<<>>.
-spec to_integer(String) -> {Int, Rest} | {'error', Reason} when
String :: unicode:chardata(),
Int :: integer(),
Rest :: unicode:chardata(),
Reason :: 'no_integer' | badarg.
to_integer(String) ->
try take(String, "+-0123456789") of
{Head, Tail} ->
case is_empty(Head) of
true -> {error, no_integer};
false ->
List = unicode:characters_to_list(Head),
case string:list_to_integer(List) of
{error, _} = Err -> Err;
{Int, Rest} ->
to_number(String, Int, Rest, List, Tail)
end
end
catch _:_ -> {error, badarg}
end.
-spec to_float(String) -> {Float, Rest} | {'error', Reason} when
String :: unicode:chardata(),
Float :: float(),
Rest :: unicode:chardata(),
Reason :: 'no_float' | 'badarg'.
to_float(String) ->
try take(String, "+-0123456789eE.,") of
{Head, Tail} ->
case is_empty(Head) of
true -> {error, no_float};
false ->
List = unicode:characters_to_list(Head),
case string:list_to_float(List) of
{error, _} = Err -> Err;
{Float, Rest} ->
to_number(String, Float, Rest, List, Tail)
end
end
catch _:_ -> {error, badarg}
end.
to_number(String, Number, Rest, List, _Tail) when is_binary(String) ->
BSz = erlang:length(List)-erlang:length(Rest),
<<_:BSz/binary, Cont/binary>> = String,
{Number, Cont};
to_number(_, Number, Rest, _, Tail) ->
{Number, concat(Rest,Tail)}.
%% Return the remaining string with prefix removed or else nomatch
-spec prefix(String::unicode:chardata(), Prefix::unicode:chardata()) ->
'nomatch' | unicode:chardata().
prefix(Str, Prefix0) ->
Result = case unicode:characters_to_list(Prefix0) of
[] -> Str;
Prefix -> prefix_1(Str, Prefix)
end,
case Result of
[] when is_binary(Str) -> <<>>;
Res -> Res
end.
%% split String with the first occurrence of SearchPattern, return list of splits
-spec split(String, SearchPattern) -> [unicode:chardata()] when
String :: unicode:chardata(),
SearchPattern :: unicode:chardata().
split(String, SearchPattern) ->
split(String, SearchPattern, leading).
%% split String with SearchPattern, return list of splits
-spec split(String, SearchPattern, Where) -> [unicode:chardata()] when
String :: unicode:chardata(),
SearchPattern :: unicode:chardata(),
Where :: direction() | 'all'.
split(String, SearchPattern, Where) ->
case is_empty(SearchPattern) of
true -> [String];
false ->
SearchPatternCPs = unicode:characters_to_list(SearchPattern),
case split_1(String, SearchPatternCPs, 0, Where, [], []) of
{_Curr, []} -> [String];
{_Curr, Acc} when Where =:= trailing -> Acc;
{Curr, Acc} when Where =:= all -> lists:reverse([Curr|Acc]);
Acc when is_list(Acc) -> Acc
end
end.
%% Replace the first SearchPattern in String with Replacement
-spec replace(String, SearchPattern, Replacement) ->
[unicode:chardata()] when
String :: unicode:chardata(),
SearchPattern :: unicode:chardata(),
Replacement :: unicode:chardata().
replace(String, SearchPattern, Replacement) ->
lists:join(Replacement, split(String, SearchPattern)).
%% Replace Where SearchPattern in String with Replacement
-spec replace(String, SearchPattern, Replacement, Where) ->
[unicode:chardata()] when
String :: unicode:chardata(),
SearchPattern :: unicode:chardata(),
Replacement :: unicode:chardata(),
Where :: direction() | 'all'.
replace(String, SearchPattern, Replacement, Where) ->
lists:join(Replacement, split(String, SearchPattern, Where)).
%% Split Str into a list of chardata separated by one of the grapheme
%% clusters in Seps
-spec lexemes(String::unicode:chardata(),
SeparatorList::[grapheme_cluster()]) ->
[unicode:chardata()].
lexemes([], _) -> [];
lexemes(Str, []) -> [Str];
lexemes(Str, Seps0) when is_list(Seps0) ->
Seps = search_pattern(Seps0),
lexemes_m(Str, Seps, []).
-spec nth_lexeme(String, N, SeparatorList) -> unicode:chardata() when
String::unicode:chardata(),
N::non_neg_integer(),
SeparatorList::[grapheme_cluster()].
nth_lexeme(Str, 1, []) -> Str;
nth_lexeme(Str, N, Seps0) when is_list(Seps0), is_integer(N), N > 0 ->
Seps = search_pattern(Seps0),
nth_lexeme_m(Str, Seps, N).
%% find first SearchPattern in String return rest of string
-spec find(String, SearchPattern) -> unicode:chardata() | 'nomatch' when
String::unicode:chardata(),
SearchPattern::unicode:chardata().
find(String, SearchPattern) ->
find(String, SearchPattern, leading).
%% find SearchPattern in String (search in Dir direction) return rest of string
-spec find(String, SearchPattern, Dir) -> unicode:chardata() | 'nomatch' when
String::unicode:chardata(),
SearchPattern::unicode:chardata(),
Dir::direction().
find(String, "", _) -> String;
find(String, <<>>, _) -> String;
find(String, SearchPattern, leading) ->
find_l(String, unicode:characters_to_list(SearchPattern));
find(String, SearchPattern, trailing) ->
find_r(String, unicode:characters_to_list(SearchPattern), nomatch).
%% Fetch first grapheme cluster and return rest in tail
-spec next_grapheme(String::unicode:chardata()) ->
maybe_improper_list(grapheme_cluster(),unicode:chardata()) |
{error,unicode:chardata()}.
next_grapheme(CD) -> unicode_util:gc(CD).
%% Fetch first codepoint and return rest in tail
-spec next_codepoint(String::unicode:chardata()) ->
maybe_improper_list(char(),unicode:chardata()) |
{error,unicode:chardata()}.
next_codepoint(CD) -> unicode_util:cp(CD).
%% Internals
length_1([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2) ->
length_1(Cont, N+1);
length_1(Str, N) ->
case unicode_util:gc(Str) of
[] -> N;
[_|Rest] -> length_1(Rest, N+1)
end.
length_b(<<CP2/utf8, Rest/binary>>, CP1, N)
when ?ASCII_LIST(CP1,CP2) ->
length_b(Rest, CP2, N+1);
length_b(Bin0, CP1, N) ->
[_|Bin1] = unicode_util:gc([CP1|Bin0]),
case unicode_util:cp(Bin1) of
[] -> N+1;
[CP3|Bin] -> length_b(Bin, CP3, N+1)
end.
equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) ->
A =:= B andalso equal_1(AR, BR);
equal_1([], BR) -> is_empty(BR);
equal_1(A0,B0) ->
case {unicode_util:cp(A0), unicode_util:cp(B0)} of
{[CP|A],[CP|B]} -> equal_1(A,B);
{[], []} -> true;
{L1,L2} when is_list(L1), is_list(L2) -> false
end.
equal_nocase(A, A) -> true;
equal_nocase(A0, B0) ->
case {unicode_util:cp(unicode_util:casefold(A0)),
unicode_util:cp(unicode_util:casefold(B0))} of
{[CP|A],[CP|B]} -> equal_nocase(A,B);
{[], []} -> true;
{L1,L2} when is_list(L1), is_list(L2) -> false
end.
equal_norm(A, A, _Norm) -> true;
equal_norm(A0, B0, Norm) ->
case {unicode_util:cp(unicode_util:Norm(A0)),
unicode_util:cp(unicode_util:Norm(B0))} of
{[CP|A],[CP|B]} -> equal_norm(A,B, Norm);
{[], []} -> true;
{L1,L2} when is_list(L1), is_list(L2) -> false
end.
equal_norm_nocase(A, A, _Norm) -> true;
equal_norm_nocase(A0, B0, Norm) ->
case {unicode_util:cp(unicode_util:casefold(unicode_util:Norm(A0))),
unicode_util:cp(unicode_util:casefold(unicode_util:Norm(B0)))} of
{[CP|A],[CP|B]} -> equal_norm_nocase(A,B, Norm);
{[], []} -> true;
{L1,L2} when is_list(L1), is_list(L2) -> false
end.
reverse_1([CP1|[CP2|_]=Cont], Acc) when ?ASCII_LIST(CP1,CP2) ->
reverse_1(Cont, [CP1|Acc]);
reverse_1(CD, Acc) ->
case unicode_util:gc(CD) of
[GC|Rest] -> reverse_1(Rest, [GC|Acc]);
[] -> Acc
end.
reverse_b(<<CP2/utf8, Rest/binary>>, CP1, Acc)
when ?ASCII_LIST(CP1,CP2) ->
reverse_b(Rest, CP2, [CP1|Acc]);
reverse_b(Bin0, CP1, Acc) ->
[GC|Bin1] = unicode_util:gc([CP1|Bin0]),
case unicode_util:cp(Bin1) of
[] -> [GC|Acc];
[CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc])
end.
slice_l0(<<CP1/utf8, Bin/binary>>, N) when N > 0 ->
slice_lb(Bin, CP1, N);
slice_l0(L, N) ->
slice_l(L, N).
slice_l([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
slice_l(Cont, N-1);
slice_l(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
[_|Cont] -> slice_l(Cont, N-1);
[] -> []
end;
slice_l(Cont, 0) ->
Cont.
slice_lb(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 1 ->
slice_lb(Bin, CP2, N-1);
slice_lb(Bin, CP1, N) ->
[_|Rest] = unicode_util:gc([CP1|Bin]),
if N > 1 ->
case unicode_util:cp(Rest) of
[CP2|Cont] -> slice_lb(Cont, CP2, N-1);
[] -> <<>>
end;
N =:= 1 ->
Rest
end.
slice_trail(Orig, N) when is_binary(Orig) ->
case Orig of
<<CP1/utf8, Bin/binary>> when N > 0 ->
Length = slice_bin(Bin, CP1, N),
Sz = byte_size(Orig) - Length,
<<Keep:Sz/binary, _/binary>> = Orig,
Keep;
_ -> <<>>
end;
slice_trail(CD, N) when is_list(CD) ->
slice_list(CD, N).
slice_list([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
[CP1|slice_list(Cont, N-1)];
slice_list(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
[GC|Cont] -> append(GC, slice_list(Cont, N-1));
[] -> []
end;
slice_list(_, 0) ->
[].
slice_bin(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 0 ->
slice_bin(Bin, CP2, N-1);
slice_bin(CD, CP1, N) when N > 0 ->
[_|Bin] = unicode_util:gc([CP1|CD]),
case unicode_util:cp(Bin) of
[CP2|Cont] -> slice_bin(Cont, CP2, N-1);
[] -> 0
end;
slice_bin(CD, CP1, 0) ->
byte_size(CD)+byte_size(<<CP1/utf8>>).
uppercase_list([CP1|[CP2|_]=Cont], _Changed) when $a =< CP1, CP1 =< $z, CP2 < 256 ->
[CP1-32|uppercase_list(Cont, true)];
uppercase_list([CP1|[CP2|_]=Cont], Changed) when CP1 < 128, CP2 < 256 ->
[CP1|uppercase_list(Cont, Changed)];
uppercase_list([], true) ->
[];
uppercase_list([], false) ->
throw(unchanged);
uppercase_list(CPs0, Changed) ->
case unicode_util:uppercase(CPs0) of
[Char|CPs] when Char =:= hd(CPs0) -> [Char|uppercase_list(CPs, Changed)];
[Char|CPs] -> append(Char,uppercase_list(CPs, true));
[] -> uppercase_list([], Changed)
end.
uppercase_bin(CP1, <<CP2/utf8, Bin/binary>>, _Changed)
when $a =< CP1, CP1 =< $z, CP2 < 256 ->
[CP1-32|uppercase_bin(CP2, Bin, true)];
uppercase_bin(CP1, <<CP2/utf8, Bin/binary>>, Changed)
when CP1 < 128, CP2 < 256 ->
[CP1|uppercase_bin(CP2, Bin, Changed)];
uppercase_bin(CP1, Bin, Changed) ->
case unicode_util:uppercase([CP1|Bin]) of
[CP1|CPs] ->
case unicode_util:cp(CPs) of
[Next|Rest] ->
[CP1|uppercase_bin(Next, Rest, Changed)];
[] when Changed ->
[CP1];
[] ->
throw(unchanged)
end;
[Char|CPs] ->
case unicode_util:cp(CPs) of
[Next|Rest] ->
[Char|uppercase_bin(Next, Rest, true)];
[] ->
[Char]
end
end.
lowercase_list([CP1|[CP2|_]=Cont], _Changed) when $A =< CP1, CP1 =< $Z, CP2 < 256 ->
[CP1+32|lowercase_list(Cont, true)];
lowercase_list([CP1|[CP2|_]=Cont], Changed) when CP1 < 128, CP2 < 256 ->
[CP1|lowercase_list(Cont, Changed)];
lowercase_list([], true) ->
[];
lowercase_list([], false) ->
throw(unchanged);
lowercase_list(CPs0, Changed) ->
case unicode_util:lowercase(CPs0) of
[Char|CPs] when Char =:= hd(CPs0) -> [Char|lowercase_list(CPs, Changed)];
[Char|CPs] -> append(Char,lowercase_list(CPs, true));
[] -> lowercase_list([], Changed)
end.
lowercase_bin(CP1, <<CP2/utf8, Bin/binary>>, _Changed)
when $A =< CP1, CP1 =< $Z, CP2 < 256 ->
[CP1+32|lowercase_bin(CP2, Bin, true)];
lowercase_bin(CP1, <<CP2/utf8, Bin/binary>>, Changed)
when CP1 < 128, CP2 < 256 ->
[CP1|lowercase_bin(CP2, Bin, Changed)];
lowercase_bin(CP1, Bin, Changed) ->
case unicode_util:lowercase([CP1|Bin]) of
[CP1|CPs] ->
case unicode_util:cp(CPs) of
[Next|Rest] ->
[CP1|lowercase_bin(Next, Rest, Changed)];
[] when Changed ->
[CP1];
[] ->
throw(unchanged)
end;
[Char|CPs] ->
case unicode_util:cp(CPs) of
[Next|Rest] ->
[Char|lowercase_bin(Next, Rest, true)];
[] ->
[Char]
end
end.
casefold_list([CP1|[CP2|_]=Cont], _Changed) when $A =< CP1, CP1 =< $Z, CP2 < 256 ->
[CP1+32|casefold_list(Cont, true)];
casefold_list([CP1|[CP2|_]=Cont], Changed) when CP1 < 128, CP2 < 256 ->
[CP1|casefold_list(Cont, Changed)];
casefold_list([], true) ->
[];
casefold_list([], false) ->
throw(unchanged);
casefold_list(CPs0, Changed) ->
case unicode_util:casefold(CPs0) of
[Char|CPs] when Char =:= hd(CPs0) -> [Char|casefold_list(CPs, Changed)];
[Char|CPs] -> append(Char,casefold_list(CPs, true));
[] -> casefold_list([], Changed)
end.
casefold_bin(CP1, <<CP2/utf8, Bin/binary>>, _Changed)
when $A =< CP1, CP1 =< $Z, CP2 < 256 ->
[CP1+32|casefold_bin(CP2, Bin, true)];
casefold_bin(CP1, <<CP2/utf8, Bin/binary>>, Changed)
when CP1 < 128, CP2 < 256 ->
[CP1|casefold_bin(CP2, Bin, Changed)];
casefold_bin(CP1, Bin, Changed) ->
case unicode_util:casefold([CP1|Bin]) of
[CP1|CPs] ->
case unicode_util:cp(CPs) of
[Next|Rest] ->
[CP1|casefold_bin(Next, Rest, Changed)];
[] when Changed ->
[CP1];
[] ->
throw(unchanged)
end;
[Char|CPs] ->
case unicode_util:cp(CPs) of
[Next|Rest] ->
[Char|casefold_bin(Next, Rest, true)];
[] ->
[Char]
end
end.
%% Fast path for ascii searching for one character in lists
trim_ls([CP1|[CP2|_]=Cont]=Str, Sep)
when ?ASCII_LIST(CP1,CP2) ->
case Sep of
CP1 -> trim_ls(Cont, Sep);
_ -> Str
end;
trim_ls(Str, Sep) ->
trim_l(Str, [Sep]).
trim_l([CP1|[CP2|_]=Cont]=Str, Sep)
when ?ASCII_LIST(CP1,CP2) ->
case lists:member(CP1, Sep) of
true -> trim_l(Cont, Sep);
false -> Str
end;
trim_l([Bin|Cont0], Sep) when is_binary(Bin) ->
case bin_search_inv(Bin, Cont0, Sep) of
{nomatch, Cont} -> trim_l(Cont, Sep);
Keep -> Keep
end;
trim_l(Str, Sep) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
case lists:member(C, Sep) of
true -> trim_l(Cs, Sep);
false -> Str
end;
[] -> []
end;
trim_l(Bin, Sep) when is_binary(Bin) ->
case bin_search_inv(Bin, [], Sep) of
{nomatch,_} -> <<>>;
[Keep] -> Keep
end.
%% Fast path for ascii searching for one character in lists
trim_ts([Sep|Cs1]=Str, Sep) ->
case Cs1 of
[] -> [];
[CP2|_] when ?ASCII_LIST(Sep,CP2) ->
Tail = trim_ts(Cs1, Sep),
case is_empty(Tail) of
true -> [];
false -> [Sep|Tail]
end;
_ ->
trim_t(Str, 0, search_pattern([Sep]))
end;
trim_ts([CP|Cont],Sep) when is_integer(CP) ->
[CP|trim_ts(Cont, Sep)];
trim_ts(Str, Sep) ->
trim_t(Str, 0, search_pattern([Sep])).
trim_t([CP1|Cont]=Cs0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
case lists:member(CP1, CPs) of
true ->
[GC|Cs1] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
true ->
Tail = trim_t(Cs1, 0, Seps),
case is_empty(Tail) of
true -> [];
false -> append(GC,Tail)
end;
false ->
append(GC,trim_t(Cs1, 0, Seps))
end;
false ->
[CP1|trim_t(Cont, 0, Seps)]
end;
trim_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
Seps = search_compile(Seps0),
case bin_search(Rest, Cont0, Seps) of
{nomatch,_} ->
stack(Bin, trim_t(Cont0, 0, Seps));
[SepStart|Cont1] ->
case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
Tail = trim_t(Cont, 0, Seps),
case is_empty(Tail) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, _/binary>> = Bin,
Keep;
false ->
Used = cp_prefix(Cont0, Cont),
stack(Bin, stack(Used, Tail))
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
trim_t([Bin|Cont], KeepSz, Seps)
end
end;
trim_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
case unicode_util:gc(Str) of
[GC|Cs1] ->
case lists:member(GC, GCs) of
true ->
Tail = trim_t(Cs1, 0, Seps),
case is_empty(Tail) of
true -> [];
false -> append(GC,Tail)
end;
false ->
append(GC,trim_t(Cs1, 0, Seps))
end;
[] -> []
end;
trim_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
Seps = search_compile(Seps0),
case bin_search(Rest, [], Seps) of
{nomatch,_} -> Bin;
[SepStart] ->
case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, _/binary>> = Bin,
Keep;
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
trim_t(Bin, KeepSz, Seps)
end
end.
take_l([CP1|[CP2|_]=Cont]=Str, Seps, Acc)
when ?ASCII_LIST(CP1,CP2) ->
case lists:member(CP1, Seps) of
true -> take_l(Cont, Seps, [CP1|Acc]);
false -> {rev(Acc), Str}
end;
take_l([Bin|Cont0], Seps, Acc) when is_binary(Bin) ->
case bin_search_inv(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
take_l(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
take_l(Str, Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
case lists:member(C, Seps) of
true -> take_l(Cs, Seps, append(rev(C),Acc));
false -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
take_l(Bin, Seps, Acc) when is_binary(Bin) ->
case bin_search_inv(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
First = byte_size(Bin) - byte_size(After),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep, Acc), After}
end.
take_lc([CP1|Cont]=Str0, {GCs,CPs,_}=Seps, Acc) when is_integer(CP1) ->
case lists:member(CP1, CPs) of
true ->
[GC|Str] = unicode_util:gc(Str0),
case lists:member(GC, GCs) of
false -> take_lc(Str, Seps, append(rev(GC),Acc));
true -> {rev(Acc), Str0}
end;
false ->
take_lc(Cont, Seps, append(CP1,Acc))
end;
take_lc([Bin|Cont0], Seps0, Acc) when is_binary(Bin) ->
Seps = search_compile(Seps0),
case bin_search(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
take_lc(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
take_lc(Str, {GCs,_,_}=Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
case lists:member(C, GCs) of
false -> take_lc(Cs, Seps, append(rev(C),Acc));
true -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
take_lc(Bin, Seps0, Acc) when is_binary(Bin) ->
Seps = search_compile(Seps0),
case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
First = byte_size(Bin) - byte_size(After),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep, Acc), After}
end.
take_t([CP1|Cont]=Str0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
case lists:member(CP1, CPs) of
true ->
[GC|Str] = unicode_util:gc(Str0),
case lists:member(GC, GCs) of
true ->
{Head, Tail} = take_t(Str, 0, Seps),
case is_empty(Head) of
true -> {Head, append(GC,Tail)};
false -> {append(GC,Head), Tail}
end;
false ->
{Head, Tail} = take_t(Str, 0, Seps),
{append(GC,Head), Tail}
end;
false ->
{Head, Tail} = take_t(Cont, 0, Seps),
{[CP1|Head], Tail}
end;
take_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
Seps = search_compile(Seps0),
case bin_search(Rest, Cont0, Seps) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
{Head, Tail} = take_t(Cont, 0, Seps),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
{Head, Tail} = take_t(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
{Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
take_t([Bin|Cont], KeepSz, Seps)
end
end;
take_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
case unicode_util:gc(Str) of
[GC|Cs1] ->
case lists:member(GC, GCs) of
true ->
{Head, Tail} = take_t(Cs1, 0, Seps),
case is_empty(Head) of
true -> {Head, append(GC,Tail)};
false -> {append(GC,Head), Tail}
end;
false ->
{Head, Tail} = take_t(Cs1, 0, Seps),
{append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
take_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
Seps = search_compile(Seps0),
case bin_search(Rest, [], Seps) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
take_t(Bin, KeepSz, Seps)
end
end.
take_tc([CP1|[CP2|_]=Cont], _, {GCs,_,_}=Seps) when ?ASCII_LIST(CP1,CP2) ->
case lists:member(CP1, GCs) of
false ->
{Head, Tail} = take_tc(Cont, 0, Seps),
case is_empty(Head) of
true -> {Head, append(CP1,Tail)};
false -> {append(CP1,Head), Tail}
end;
true ->
{Head, Tail} = take_tc(Cont, 0, Seps),
{append(CP1,Head), Tail}
end;
take_tc([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
case bin_search_inv(Rest, Cont0, GCs) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
{Head, Tail} = take_tc(Cont, 0, Seps0),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
Seps = search_compile(Seps0),
case bin_search(SepStart, Cont1, Seps) of
{nomatch, Cont} ->
{Head, Tail} = take_tc(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
{Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
take_tc([Bin|Cont], KeepSz, Seps)
end
end;
take_tc(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
case unicode_util:gc(Str) of
[GC|Cs1] ->
case lists:member(GC, GCs) of
false ->
{Head, Tail} = take_tc(Cs1, 0, Seps),
case is_empty(Head) of
true -> {Head, append(GC,Tail)};
false -> {append(GC,Head), Tail}
end;
true ->
{Head, Tail} = take_tc(Cs1, 0, Seps),
{append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
take_tc(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
case bin_search_inv(Rest, [], GCs) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
Seps = search_compile(Seps0),
case bin_search(SepStart, [], Seps) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
take_tc(Bin, KeepSz, Seps)
end
end.
prefix_1(Cs0, [GC]) ->
case unicode_util:gc(Cs0) of
[GC|Cs] -> Cs;
_ -> nomatch
end;
prefix_1([CP|Cs], [Pre|PreR]) when is_integer(CP) ->
case CP =:= Pre of
true -> prefix_1(Cs,PreR);
false -> nomatch
end;
prefix_1(<<CP/utf8, Cs/binary>>, [Pre|PreR]) ->
case CP =:= Pre of
true -> prefix_1(Cs,PreR);
false -> nomatch
end;
prefix_1(Cs0, [Pre|PreR]) ->
case unicode_util:cp(Cs0) of
[Pre|Cs] -> prefix_1(Cs,PreR);
_ -> nomatch
end.
split_1([CP1|Cs]=Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_integer(CP1) ->
case CP1=:=C of
true ->
case prefix_1(Cs0, Needle) of
nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc);
Rest when Where =:= leading ->
[rev(Curr), Rest];
Rest when Where =:= trailing ->
split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]);
Rest when Where =:= all ->
split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc])
end;
false ->
split_1(Cs, Needle, 0, Where, append(CP1,Curr), Acc)
end;
split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc)
when is_binary(Bin) ->
case bin_search_str(Bin, Start, Cont0, Needle) of
{nomatch,Sz,Cont} ->
<<Keep:Sz/binary, _/binary>> = Bin,
split_1(Cont, Needle, 0, Where, [Keep|Curr0], Acc);
{Before, [Cs0|Cont], After} ->
Curr = add_non_empty(Before,Curr0),
case Where of
leading ->
[rev(Curr),After];
trailing ->
<<_/utf8, Cs/binary>> = Cs0,
Next = byte_size(Bin) - byte_size(Cs),
split_1([Bin|Cont], Needle, Next, Where,
Curr0, [rev(Curr),After]);
all ->
split_1(After, Needle, 0, Where, [], [rev(Curr)|Acc])
end
end;
split_1(Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_list(Cs0) ->
case unicode_util:cp(Cs0) of
[C|Cs] ->
case prefix_1(Cs0, Needle) of
nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc);
Rest when Where =:= leading ->
[rev(Curr), Rest];
Rest when Where =:= trailing ->
split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]);
Rest when Where =:= all ->
split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc])
end;
[Other|Cs] ->
split_1(Cs, Needle, 0, Where, append(Other,Curr), Acc);
[] ->
{rev(Curr), Acc}
end;
split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) ->
case bin_search_str(Bin, Start, [], Needle) of
{nomatch,_,_} ->
<<_:Start/binary, Keep/binary>> = Bin,
{rev([Keep|Curr0]), Acc};
{Before, [Cs0], After} ->
case Where of
leading ->
[rev([Before|Curr0]),After];
trailing ->
<<_/utf8, Cs/binary>> = Cs0,
Next = byte_size(Bin) - byte_size(Cs),
split_1(Bin, Needle, Next, Where, Curr0,
[btoken(Before,Curr0),After]);
all ->
Next = byte_size(Bin) - byte_size(After),
<<_:Start/binary, Keep/binary>> = Before,
Curr = [Keep|Curr0],
split_1(Bin, Needle, Next, Where, [], [rev(Curr)|Acc])
end
end.
lexemes_m([CP|_]=Cs0, {GCs,CPs,_}=Seps, Ts) when is_integer(CP) ->
case lists:member(CP, CPs) of
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
true ->
lexemes_m(Cs2, Seps, Ts);
false ->
{Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
false ->
{Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
lexemes_m([Bin|Cont0], {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
lexemes_m(Cont, Seps0, Ts);
Cs ->
Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
lexemes_m(Cs0, {GCs, _, _}=Seps0, Ts) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
lexemes_m(Cs, Seps0, Ts);
false ->
Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
[] ->
lists:reverse(Ts)
end;
lexemes_m(Bin, {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
case bin_search_inv(Bin, [], GCs) of
{nomatch,_} ->
lists:reverse(Ts);
[Cs] ->
Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts))
end.
lexeme_pick([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps, Tkn) when is_integer(CP) ->
case lists:member(CP, CPs) of
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
true -> {rev(Tkn), Cs2};
false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
end;
false -> lexeme_pick(Cs1, Seps, [CP|Tkn])
end;
lexeme_pick([Bin|Cont0], Seps, Tkn) when is_binary(Bin) ->
case bin_search(Bin, Cont0, Seps) of
{nomatch,_} ->
lexeme_pick(Cont0, Seps, [Bin|Tkn]);
[Left|_Cont] = Cs ->
Bytes = byte_size(Bin) - byte_size(Left),
<<Lexeme:Bytes/binary, _/binary>> = Bin,
{btoken(Lexeme, Tkn), Cs}
end;
lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
case unicode_util:cp(Cs0) of
[CP|Cs] ->
case lists:member(CP, CPs) of
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
true -> {rev(Tkn), Cs2};
false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
end;
false ->
lexeme_pick(Cs, Seps, append(CP,Tkn))
end;
[] ->
{rev(Tkn), []}
end;
lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin,Tkn), []};
[Left] ->
Bytes = byte_size(Bin) - byte_size(Left),
<<Lexeme:Bytes/binary, _/binary>> = Bin,
{btoken(Lexeme, Tkn), Left}
end.
nth_lexeme_m([Bin|Cont0], {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
nth_lexeme_m(Cont, Seps0, N);
Cs when N > 1 ->
Rest = lexeme_skip(Cs, Seps0),
nth_lexeme_m(Rest, Seps0, N-1);
Cs ->
Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs, Seps, []),
Lexeme
end;
nth_lexeme_m(Cs0, {GCs, _, _}=Seps0, N) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
nth_lexeme_m(Cs, Seps0, N);
false when N > 1 ->
Cs1 = lexeme_skip(Cs, Seps0),
nth_lexeme_m(Cs1, Seps0, N-1);
false ->
Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs0, Seps, []),
Lexeme
end;
[] ->
[]
end;
nth_lexeme_m(Bin, {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
Seps = search_compile(Seps0),
case bin_search_inv(Bin, [], GCs) of
[Cs] when N > 1 ->
Cs1 = lexeme_skip(Cs, Seps),
nth_lexeme_m(Cs1, Seps, N-1);
[Cs] ->
{Lexeme,_} = lexeme_pick(Cs, Seps, []),
Lexeme;
{nomatch,_} ->
<<>>
end.
lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) ->
case lists:member(CP, CPs) of
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
lexeme_skip(Cs1, Seps)
end;
lexeme_skip([Bin|Cont0], Seps0) when is_binary(Bin) ->
Seps = search_compile(Seps0),
case bin_search(Bin, Cont0, Seps) of
{nomatch,_} -> lexeme_skip(Cont0, Seps);
Cs -> tl(unicode_util:gc(Cs))
end;
lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
case unicode_util:cp(Cs0) of
[CP|Cs] ->
case lists:member(CP, CPs) of
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
lexeme_skip(Cs, Seps)
end;
[] ->
[]
end;
lexeme_skip(Bin, Seps0) when is_binary(Bin) ->
Seps = search_compile(Seps0),
case bin_search(Bin, [], Seps) of
{nomatch,_} -> <<>>;
[Left] -> tl(unicode_util:gc(Left))
end.
find_l([C1|Cs]=Cs0, [C|_]=Needle) when is_integer(C1) ->
case C1 of
C ->
case prefix_1(Cs0, Needle) of
nomatch -> find_l(Cs, Needle);
_ -> Cs0
end;
_ ->
find_l(Cs, Needle)
end;
find_l([Bin|Cont0], Needle) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch, _, Cont} ->
find_l(Cont, Needle);
{_Before, Cs, _After} ->
Cs
end;
find_l(Cs0, [C|_]=Needle) when is_list(Cs0) ->
case unicode_util:cp(Cs0) of
[C|Cs] ->
case prefix_1(Cs0, Needle) of
nomatch -> find_l(Cs, Needle);
_ -> Cs0
end;
[_C|Cs] ->
find_l(Cs, Needle);
[] -> nomatch
end;
find_l(Bin, Needle) ->
case bin_search_str(Bin, 0, [], Needle) of
{nomatch,_,_} -> nomatch;
{_Before, [Cs], _After} -> Cs
end.
find_r([Cp|Cs]=Cs0, [C|_]=Needle, Res) when is_integer(Cp) ->
case Cp of
C ->
case prefix_1(Cs0, Needle) of
nomatch -> find_r(Cs, Needle, Res);
_ -> find_r(Cs, Needle, Cs0)
end;
_ ->
find_r(Cs, Needle, Res)
end;
find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch,_,Cont} ->
find_r(Cont, Needle, Res);
{_, Cs0, _} ->
[_|Cs] = unicode_util:gc(Cs0),
find_r(Cs, Needle, Cs0)
end;
find_r(Cs0, [C|_]=Needle, Res) when is_list(Cs0) ->
case unicode_util:cp(Cs0) of
[C|Cs] ->
case prefix_1(Cs0, Needle) of
nomatch -> find_r(Cs, Needle, Res);
_ -> find_r(Cs, Needle, Cs0)
end;
[_C|Cs] ->
find_r(Cs, Needle, Res);
[] -> Res
end;
find_r(Bin, Needle, Res) ->
case bin_search_str(Bin, 0, [], Needle) of
{nomatch,_,_} -> Res;
{_Before, [Cs0], _After} ->
<<_/utf8, Cs/binary>> = Cs0,
find_r(Cs, Needle, Cs0)
end.
%% These are used to avoid creating lists around binaries
%% might be unnecessary, is there a better solution?
btoken(Token, []) -> Token;
btoken(BinPart, [C]) when is_integer(C) -> <<C/utf8, BinPart/binary>>;
btoken(<<>>, Tkn) -> lists:reverse(Tkn);
btoken(BinPart, Cs) -> [lists:reverse(Cs),BinPart].
rev([B]) when is_binary(B) -> B;
rev(L) when is_list(L) -> lists:reverse(L);
rev(C) when is_integer(C) -> C.
append(Char, <<>>) when is_integer(Char) -> [Char];
append(Char, <<>>) when is_list(Char) -> Char;
append(Char, Bin) when is_binary(Bin) -> [Char,Bin];
append(Char, Str) when is_integer(Char) -> [Char|Str];
append(GC, Str) when is_list(GC) -> GC ++ Str.
stack(Bin, []) -> Bin;
stack(<<>>, St) -> St;
stack([], St) -> St;
stack(Bin, St) -> [Bin|St].
add_non_empty(<<>>, L) -> L;
add_non_empty(Token, L) -> [Token|L].
cp_prefix(Orig, Cont) ->
case unicode_util:cp(Cont) of
[] -> Orig;
[Cp|Rest] -> cp_prefix_1(Orig, Cp, Rest)
end.
cp_prefix_1(Orig, Until, Cont) ->
case unicode_util:cp(Orig) of
[Until|Rest] ->
case equal(Rest, Cont) of
true -> [];
false-> [Until|cp_prefix_1(Rest, Until, Cont)]
end;
[CP|Rest] -> [CP|cp_prefix_1(Rest, Until, Cont)]
end.
%% Binary special
bin_search(Bin, Cont, {Seps,_,BP}) ->
bin_search_loop(Bin, 0, BP, Cont, Seps).
%% Need to work with [<<$a>>, <<778/utf8>>],
%% i.e. å in nfd form $a "COMBINING RING ABOVE"
%% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>>
%% combined with other characters are currently ignored.
search_pattern({_,_,_}=P) -> P;
search_pattern(Seps) ->
CPs = search_cp(Seps),
{Seps, CPs, undefined}.
search_compile({Sep, CPs, undefined}) ->
{Sep, CPs, binary:compile_pattern(bin_pattern(CPs))};
search_compile({_,_,_}=Compiled) -> Compiled.
search_cp([CP|Seps]) when is_integer(CP) ->
[CP|search_cp(Seps)];
search_cp([Pattern|Seps]) ->
[CP|_] = unicode_util:cp(Pattern),
[CP|search_cp(Seps)];
search_cp([]) -> [].
bin_pattern([CP|Seps]) ->
[<<CP/utf8>>|bin_pattern(Seps)];
bin_pattern([]) -> [].
bin_search_loop(Bin0, Start, _, Cont, _Seps)
when byte_size(Bin0) =< Start; Start < 0 ->
{nomatch, Cont};
bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
<<_:Start/binary, Bin/binary>> = Bin0,
case binary:match(Bin, BinSeps) of
nomatch ->
{nomatch,Cont};
{Where, _CL} when Cont =:= [] ->
<<_:Where/binary, Cont1/binary>> = Bin,
[GC|Cont2] = unicode_util:gc(Cont1),
case lists:member(GC, Seps) of
false when Cont2 =:= [] ->
{nomatch, []};
false ->
Next = byte_size(Bin0) - byte_size(Cont2),
bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
true ->
[Cont1]
end;
{Where, _CL} ->
<<_:Where/binary, Cont0/binary>> = Bin,
Cont1 = [Cont0|Cont],
[GC|Cont2] = unicode_util:gc(Cont1),
case lists:member(GC, Seps) of
false ->
case Cont2 of
[BinR|Cont] when is_binary(BinR) ->
Next = byte_size(Bin0) - byte_size(BinR),
bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
_ ->
{nomatch, Cont2}
end;
true ->
Cont1
end
end.
bin_search_inv(<<>>, Cont, _) ->
{nomatch, Cont};
bin_search_inv(Bin, Cont, [Sep]) ->
bin_search_inv_1(Bin, Cont, Sep);
bin_search_inv(Bin, Cont, Seps) ->
bin_search_inv_n(Bin, Cont, Seps).
bin_search_inv_1(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Sep) ->
case BinRest of
<<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
case CP1 of
Sep -> bin_search_inv_1(BinRest, Cont, Sep);
_ -> [Bin0|Cont]
end;
_ when Cont =:= [] ->
case unicode_util:gc(Bin0) of
[Sep|Bin] -> bin_search_inv_1(Bin, Cont, Sep);
_ -> [Bin0|Cont]
end;
_ ->
case unicode_util:gc([Bin0|Cont]) of
[Sep|[Bin|Cont]] when is_binary(Bin) ->
bin_search_inv_1(Bin, Cont, Sep);
[Sep|Cs] ->
{nomatch, Cs};
_ -> [Bin0|Cont]
end
end;
bin_search_inv_1(<<>>, Cont, _Sep) ->
{nomatch, Cont};
bin_search_inv_1([], Cont, _Sep) ->
{nomatch, Cont}.
bin_search_inv_n(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Seps) ->
case BinRest of
<<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
case lists:member(CP1,Seps) of
true -> bin_search_inv_n(BinRest, Cont, Seps);
false -> [Bin0|Cont]
end;
_ when Cont =:= [] ->
[GC|Bin] = unicode_util:gc(Bin0),
case lists:member(GC, Seps) of
true -> bin_search_inv_n(Bin, Cont, Seps);
false -> [Bin0|Cont]
end;
_ ->
[GC|Cs0] = unicode_util:gc([Bin0|Cont]),
case lists:member(GC, Seps) of
false -> [Bin0|Cont];
true ->
case Cs0 of
[Bin|Cont] when is_binary(Bin) ->
bin_search_inv_n(Bin, Cont, Seps);
_ ->
{nomatch, Cs0}
end
end
end;
bin_search_inv_n(<<>>, Cont, _Sep) ->
{nomatch, Cont};
bin_search_inv_n([], Cont, _Sep) ->
{nomatch, Cont}.
bin_search_str(Bin0, Start, [], SearchCPs) ->
Compiled = binary:compile_pattern(unicode:characters_to_binary(SearchCPs)),
bin_search_str_1(Bin0, Start, Compiled, SearchCPs);
bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
First = binary:compile_pattern(<<CP/utf8>>),
bin_search_str_2(Bin0, Start, Cont, First, SearchCPs).
bin_search_str_1(Bin0, Start, First, SearchCPs) ->
<<_:Start/binary, Bin/binary>> = Bin0,
case binary:match(Bin, First) of
nomatch -> {nomatch, byte_size(Bin0), []};
{Where0, _} ->
Where = Start+Where0,
<<Keep:Where/binary, Cs0/binary>> = Bin0,
case prefix_1(Cs0, SearchCPs) of
nomatch ->
<<_/utf8, Cs/binary>> = Cs0,
KeepSz = byte_size(Bin0) - byte_size(Cs),
bin_search_str_1(Bin0, KeepSz, First, SearchCPs);
[] ->
{Keep, [Cs0], <<>>};
Rest ->
{Keep, [Cs0], Rest}
end
end.
bin_search_str_2(Bin0, Start, Cont, First, SearchCPs) ->
<<_:Start/binary, Bin/binary>> = Bin0,
case binary:match(Bin, First) of
nomatch -> {nomatch, byte_size(Bin0), Cont};
{Where0, _} ->
Where = Start+Where0,
<<Keep:Where/binary, Cs0/binary>> = Bin0,
[GC|Cs]=unicode_util:gc(Cs0),
case prefix_1(stack(Cs0,Cont), SearchCPs) of
nomatch when is_binary(Cs) ->
KeepSz = byte_size(Bin0) - byte_size(Cs),
bin_search_str_2(Bin0, KeepSz, Cont, First, SearchCPs);
nomatch ->
{nomatch, Where, stack([GC|Cs],Cont)};
[] ->
{Keep, [Cs0|Cont], <<>>};
Rest ->
{Keep, [Cs0|Cont], Rest}
end
end.
%%---------------------------------------------------------------------------
%% OLD lists API kept for backwards compability
%%---------------------------------------------------------------------------
%% Robert's bit
%% len(String)
%% Return the length of a string.
-spec len(String) -> Length when
String :: string(),
Length :: non_neg_integer().
len(S) -> erlang:length(S).
%% equal(String1, String2)
%% Test if 2 strings are equal.
%% -spec equal(String1, String2) -> boolean() when
%% String1 :: string(),
%% String2 :: string().
%% equal(S, S) -> true;
%% equal(_, _) -> false.
%% concat(String1, String2)
%% Concatenate 2 strings.
-spec concat(String1, String2) -> String3 when
String1 :: string(),
String2 :: string(),
String3 :: string().
concat(S1, S2) -> S1 ++ S2.
%% chr(String, Char)
%% rchr(String, Char)
%% Return the first/last index of the character in a string.
-spec chr(String, Character) -> Index when
String :: string(),
Character :: char(),
Index :: non_neg_integer().
chr(S, C) when is_integer(C) -> chr(S, C, 1).
chr([C|_Cs], C, I) -> I;
chr([_|Cs], C, I) -> chr(Cs, C, I+1);
chr([], _C, _I) -> 0.
-spec rchr(String, Character) -> Index when
String :: string(),
Character :: char(),
Index :: non_neg_integer().
rchr(S, C) when is_integer(C) -> rchr(S, C, 1, 0).
rchr([C|Cs], C, I, _L) -> %Found one, now find next!
rchr(Cs, C, I+1, I);
rchr([_|Cs], C, I, L) ->
rchr(Cs, C, I+1, L);
rchr([], _C, _I, L) -> L.
%% str(String, SubString)
%% rstr(String, SubString)
%% index(String, SubString)
%% Return the first/last index of the sub-string in a string.
%% index/2 is kept for backwards compatibility.
-spec str(String, SubString) -> Index when
String :: string(),
SubString :: string(),
Index :: non_neg_integer().
str(S, Sub) when is_list(Sub) -> str(S, Sub, 1).
str([C|S], [C|Sub], I) ->
case l_prefix(Sub, S) of
true -> I;
false -> str(S, [C|Sub], I+1)
end;
str([_|S], Sub, I) -> str(S, Sub, I+1);
str([], _Sub, _I) -> 0.
-spec rstr(String, SubString) -> Index when
String :: string(),
SubString :: string(),
Index :: non_neg_integer().
rstr(S, Sub) when is_list(Sub) -> rstr(S, Sub, 1, 0).
rstr([C|S], [C|Sub], I, L) ->
case l_prefix(Sub, S) of
true -> rstr(S, [C|Sub], I+1, I);
false -> rstr(S, [C|Sub], I+1, L)
end;
rstr([_|S], Sub, I, L) -> rstr(S, Sub, I+1, L);
rstr([], _Sub, _I, L) -> L.
l_prefix([C|Pre], [C|String]) -> l_prefix(Pre, String);
l_prefix([], String) when is_list(String) -> true;
l_prefix(Pre, String) when is_list(Pre), is_list(String) -> false.
%% span(String, Chars) -> Length.
%% cspan(String, Chars) -> Length.
-spec span(String, Chars) -> Length when
String :: string(),
Chars :: string(),
Length :: non_neg_integer().
span(S, Cs) when is_list(Cs) -> span(S, Cs, 0).
span([C|S], Cs, I) ->
case member(C, Cs) of
true -> span(S, Cs, I+1);
false -> I
end;
span([], _Cs, I) -> I.
-spec cspan(String, Chars) -> Length when
String :: string(),
Chars :: string(),
Length :: non_neg_integer().
cspan(S, Cs) when is_list(Cs) -> cspan(S, Cs, 0).
cspan([C|S], Cs, I) ->
case member(C, Cs) of
true -> I;
false -> cspan(S, Cs, I+1)
end;
cspan([], _Cs, I) -> I.
%% substr(String, Start)
%% substr(String, Start, Length)
%% Extract a sub-string from String.
-spec substr(String, Start) -> SubString when
String :: string(),
SubString :: string(),
Start :: pos_integer().
substr(String, 1) when is_list(String) ->
String;
substr(String, S) when is_integer(S), S > 1 ->
substr2(String, S).
-spec substr(String, Start, Length) -> SubString when
String :: string(),
SubString :: string(),
Start :: pos_integer(),
Length :: non_neg_integer().
substr(String, S, L) when is_integer(S), S >= 1, is_integer(L), L >= 0 ->
substr1(substr2(String, S), L).
substr1([C|String], L) when L > 0 -> [C|substr1(String, L-1)];
substr1(String, _L) when is_list(String) -> []. %Be nice!
substr2(String, 1) when is_list(String) -> String;
substr2([_|String], S) -> substr2(String, S-1).
%% tokens(String, Seperators).
%% Return a list of tokens seperated by characters in Seperators.
-spec tokens(String, SeparatorList) -> Tokens when
String :: string(),
SeparatorList :: string(),
Tokens :: [Token :: nonempty_string()].
tokens(S, Seps) ->
case Seps of
[] ->
case S of
[] -> [];
[_|_] -> [S]
end;
[C] ->
tokens_single_1(lists:reverse(S), C, []);
[_|_] ->
tokens_multiple_1(lists:reverse(S), Seps, [])
end.
tokens_single_1([Sep|S], Sep, Toks) ->
tokens_single_1(S, Sep, Toks);
tokens_single_1([C|S], Sep, Toks) ->
tokens_single_2(S, Sep, Toks, [C]);
tokens_single_1([], _, Toks) ->
Toks.
tokens_single_2([Sep|S], Sep, Toks, Tok) ->
tokens_single_1(S, Sep, [Tok|Toks]);
tokens_single_2([C|S], Sep, Toks, Tok) ->
tokens_single_2(S, Sep, Toks, [C|Tok]);
tokens_single_2([], _Sep, Toks, Tok) ->
[Tok|Toks].
tokens_multiple_1([C|S], Seps, Toks) ->
case member(C, Seps) of
true -> tokens_multiple_1(S, Seps, Toks);
false -> tokens_multiple_2(S, Seps, Toks, [C])
end;
tokens_multiple_1([], _Seps, Toks) ->
Toks.
tokens_multiple_2([C|S], Seps, Toks, Tok) ->
case member(C, Seps) of
true -> tokens_multiple_1(S, Seps, [Tok|Toks]);
false -> tokens_multiple_2(S, Seps, Toks, [C|Tok])
end;
tokens_multiple_2([], _Seps, Toks, Tok) ->
[Tok|Toks].
-spec chars(Character, Number) -> String when
Character :: char(),
Number :: non_neg_integer(),
String :: string().
chars(C, N) -> chars(C, N, []).
-spec chars(Character, Number, Tail) -> String when
Character :: char(),
Number :: non_neg_integer(),
Tail :: string(),
String :: string().
chars(C, N, Tail) when N > 0 ->
chars(C, N-1, [C|Tail]);
chars(C, 0, Tail) when is_integer(C) ->
Tail.
%% Torbjörn's bit.
%%% COPIES %%%
-spec copies(String, Number) -> Copies when
String :: string(),
Copies :: string(),
Number :: non_neg_integer().
copies(CharList, Num) when is_list(CharList), is_integer(Num), Num >= 0 ->
copies(CharList, Num, []).
copies(_CharList, 0, R) ->
R;
copies(CharList, Num, R) ->
copies(CharList, Num-1, CharList++R).
%%% WORDS %%%
-spec words(String) -> Count when
String :: string(),
Count :: pos_integer().
words(String) -> words(String, $\s).
-spec words(String, Character) -> Count when
String :: string(),
Character :: char(),
Count :: pos_integer().
words(String, Char) when is_integer(Char) ->
w_count(strip(String, both, Char), Char, 0).
w_count([], _, Num) -> Num+1;
w_count([H|T], H, Num) -> w_count(strip(T, left, H), H, Num+1);
w_count([_H|T], Char, Num) -> w_count(T, Char, Num).
%%% SUB_WORDS %%%
-spec sub_word(String, Number) -> Word when
String :: string(),
Word :: string(),
Number :: integer().
sub_word(String, Index) -> sub_word(String, Index, $\s).
-spec sub_word(String, Number, Character) -> Word when
String :: string(),
Word :: string(),
Number :: integer(),
Character :: char().
sub_word(String, Index, Char) when is_integer(Index), is_integer(Char) ->
case words(String, Char) of
Num when Num < Index ->
[];
_Num ->
s_word(strip(String, left, Char), Index, Char, 1, [])
end.
s_word([], _, _, _,Res) -> lists:reverse(Res);
s_word([Char|_],Index,Char,Index,Res) -> lists:reverse(Res);
s_word([H|T],Index,Char,Index,Res) -> s_word(T,Index,Char,Index,[H|Res]);
s_word([Char|T],Stop,Char,Index,Res) when Index < Stop ->
s_word(strip(T,left,Char),Stop,Char,Index+1,Res);
s_word([_|T],Stop,Char,Index,Res) when Index < Stop ->
s_word(T,Stop,Char,Index,Res).
%%% STRIP %%%
-spec strip(string()) -> string().
strip(String) -> strip(String, both).
-spec strip(String, Direction) -> Stripped when
String :: string(),
Stripped :: string(),
Direction :: 'left' | 'right' | 'both'.
strip(String, left) -> strip_left(String, $\s);
strip(String, right) -> strip_right(String, $\s);
strip(String, both) ->
strip_right(strip_left(String, $\s), $\s).
-spec strip(String, Direction, Character) -> Stripped when
String :: string(),
Stripped :: string(),
Direction :: 'left' | 'right' | 'both',
Character :: char().
strip(String, right, Char) -> strip_right(String, Char);
strip(String, left, Char) -> strip_left(String, Char);
strip(String, both, Char) ->
strip_right(strip_left(String, Char), Char).
strip_left([Sc|S], Sc) ->
strip_left(S, Sc);
strip_left([_|_]=S, Sc) when is_integer(Sc) -> S;
strip_left([], Sc) when is_integer(Sc) -> [].
strip_right([Sc|S], Sc) ->
case strip_right(S, Sc) of
[] -> [];
T -> [Sc|T]
end;
strip_right([C|S], Sc) ->
[C|strip_right(S, Sc)];
strip_right([], Sc) when is_integer(Sc) ->
[].
%%% LEFT %%%
-spec left(String, Number) -> Left when
String :: string(),
Left :: string(),
Number :: non_neg_integer().
left(String, Len) when is_integer(Len) -> left(String, Len, $\s).
-spec left(String, Number, Character) -> Left when
String :: string(),
Left :: string(),
Number :: non_neg_integer(),
Character :: char().
left(String, Len, Char) when is_integer(Char) ->
Slen = erlang:length(String),
if
Slen > Len -> substr(String, 1, Len);
Slen < Len -> l_pad(String, Len-Slen, Char);
Slen =:= Len -> String
end.
l_pad(String, Num, Char) -> String ++ chars(Char, Num).
%%% RIGHT %%%
-spec right(String, Number) -> Right when
String :: string(),
Right :: string(),
Number :: non_neg_integer().
right(String, Len) when is_integer(Len) -> right(String, Len, $\s).
-spec right(String, Number, Character) -> Right when
String :: string(),
Right :: string(),
Number :: non_neg_integer(),
Character :: char().
right(String, Len, Char) when is_integer(Char) ->
Slen = erlang:length(String),
if
Slen > Len -> substr(String, Slen-Len+1);
Slen < Len -> r_pad(String, Len-Slen, Char);
Slen =:= Len -> String
end.
r_pad(String, Num, Char) -> chars(Char, Num, String).
%%% CENTRE %%%
-spec centre(String, Number) -> Centered when
String :: string(),
Centered :: string(),
Number :: non_neg_integer().
centre(String, Len) when is_integer(Len) -> centre(String, Len, $\s).
-spec centre(String, Number, Character) -> Centered when
String :: string(),
Centered :: string(),
Number :: non_neg_integer(),
Character :: char().
centre(String, 0, Char) when is_list(String), is_integer(Char) ->
[]; % Strange cases to centre string
centre(String, Len, Char) when is_integer(Char) ->
Slen = erlang:length(String),
if
Slen > Len -> substr(String, (Slen-Len) div 2 + 1, Len);
Slen < Len ->
N = (Len-Slen) div 2,
r_pad(l_pad(String, Len-(Slen+N), Char), N, Char);
Slen =:= Len -> String
end.
%%% SUB_STRING %%%
-spec sub_string(String, Start) -> SubString when
String :: string(),
SubString :: string(),
Start :: pos_integer().
sub_string(String, Start) -> substr(String, Start).
-spec sub_string(String, Start, Stop) -> SubString when
String :: string(),
SubString :: string(),
Start :: pos_integer(),
Stop :: pos_integer().
sub_string(String, Start, Stop) -> substr(String, Start, Stop - Start + 1).
%% ISO/IEC 8859-1 (latin1) letters are converted, others are ignored
%%
to_lower_char(C) when is_integer(C), $A =< C, C =< $Z ->
C + 32;
to_lower_char(C) when is_integer(C), 16#C0 =< C, C =< 16#D6 ->
C + 32;
to_lower_char(C) when is_integer(C), 16#D8 =< C, C =< 16#DE ->
C + 32;
to_lower_char(C) ->
C.
to_upper_char(C) when is_integer(C), $a =< C, C =< $z ->
C - 32;
to_upper_char(C) when is_integer(C), 16#E0 =< C, C =< 16#F6 ->
C - 32;
to_upper_char(C) when is_integer(C), 16#F8 =< C, C =< 16#FE ->
C - 32;
to_upper_char(C) ->
C.
-spec to_lower(String) -> Result when
String :: io_lib:latin1_string(),
Result :: io_lib:latin1_string()
; (Char) -> CharResult when
Char :: char(),
CharResult :: char().
to_lower(S) when is_list(S) ->
[to_lower_char(C) || C <- S];
to_lower(C) when is_integer(C) ->
to_lower_char(C).
-spec to_upper(String) -> Result when
String :: io_lib:latin1_string(),
Result :: io_lib:latin1_string()
; (Char) -> CharResult when
Char :: char(),
CharResult :: char().
to_upper(S) when is_list(S) ->
[to_upper_char(C) || C <- S];
to_upper(C) when is_integer(C) ->
to_upper_char(C).
-spec join(StringList, Separator) -> String when
StringList :: [string()],
Separator :: string(),
String :: string().
join([], Sep) when is_list(Sep) ->
[];
join([H|T], Sep) ->
H ++ lists:append([Sep ++ X || X <- T]).