From 2c72e662bad11a41839780f86680d4bb05367c78 Mon Sep 17 00:00:00 2001
From: Dan Gudmundsson This module provides functions for string processing. A string in this module is represented by
+ This module operates on grapheme clusters. A grapheme cluster
+ is a user-perceived character, which can be represented by several
+ codepoints.
+
+ The string length of "ß↑e̊" is 3, even though it is represented by the
+ codepoints
+ Grapheme clusters for codepoints of class
+ Splitting and appending strings is to be done on grapheme clusters
+ borders.
+ There is no verification that the results of appending strings are
+ valid or normalized.
+
+ Most of the functions expect all input to be normalized to one form,
+ see for example
+ Language or locale specific handling of input is not considered
+ in any function.
+
+ The functions can crash for non-valid input strings. For example,
+ the functions expect UTF-8 binaries but not all functions
+ verify that all binaries are encoded correctly.
+
+ Unless otherwise specified the return value type is the same as
+ the input type. That is, binary input returns binary output,
+ list input returns a list output, and mixed input can return a
+ mixed output. This module has been reworked in Erlang/OTP 20 to
+ handle A user-perceived character, consisting of one or more
+ codepoints.
+ Converts Example:
+ Returns a string where any trailing Example:
+ Returns
+ If If By default,
+ Example:
+ Removes anything before
+ By default, Example: Returns Example:
+ Returns the number of grapheme clusters in Example:
+ Returns a list of lexemes in
+ Notice that, as shown in this example, two or more
+ adjacent separator graphemes clusters in Notice that Example:
+ Converts
+ Notice that function Example:
+ Returns the first codepoint in Example:
+ Returns the first grapheme cluster in Example: Returns lexeme number Example:
+ Pads By default, Example:
+ If Example:
+ Replaces Can be implemented as: Example:
+ Returns the reverse list of the grapheme clusters in Example: Returns a substring of By default, Example:
+ Splits Example: Takes characters from Example:
+ Converts Example: Argument Example: Argument Example:
+ Converts Example:
+ Returns a string, where leading or trailing, or both,
+ Default
+ Notice that Example:
+ Converts See also Example: Here follows the function of the old API.
+ These functions only work on a list of Latin-1 characters.
+
+ The functions are kept for backward compatibility, but are
+ not recommended.
+ They will be deprecated in Erlang/OTP 21.
+ Any undocumented functions in Returns a string, where This function is Returns a string consisting of This function is Returns the index of the first occurrence of
This function is Concatenates
+ This function is Returns a string containing This function is Returns the length of the maximum initial segment of
This function is Example: Returns Returns a string with the elements of This function is Example:
+"abcd" is a valid string
+<<"abcd">> is a valid string
+["abcd"] is a valid string
+<<"abc..åäö"/utf8>> is a valid string
+<<"abc..åäö">> is NOT a valid string,
+ but a binary with Latin-1-encoded codepoints
+[<<"abc">>, "..åäö"] is a valid string
+[atom] is NOT a valid string
+
+"å" [229] or [97, 778]
+"e̊" [101, 778]
+
+1> string:trim(" sarah ").
+"sarah"
+2> string:trim(<<" sarah ">>).
+<<"sarah">>
+3> string:lexemes("foo bar", " ").
+["foo","bar"]
+4> string:lexemes(<<"foo bar">>, " ").
+[<<"foo">>,<<"bar">>]
+
+1> string:casefold("Ω and ẞ SHARP S").
+"ω and ss sharp s"
+
+182> string:chomp(<<"\nHello\n\n">>).
+<<"\nHello">>
+183> string:chomp("\nHello\r\r\n").
+"\nHello\r"
+
+1> string:equal("åäö", <<"åäö"/utf8>>).
+true
+2> string:equal("åäö", unicode:characters_to_nfd_binary("åäö")).
+false
+3> string:equal("åäö", unicode:characters_to_nfd_binary("ÅÄÖ"), true, nfc).
+true
+
+1> string:find("ab..cd..ef", ".").
+"..cd..ef"
+2> string:find(<<"ab..cd..ef">>, "..", trailing).
+<<"..ef">>
+3> string:find(<<"ab..cd..ef">>, "x", leading).
+nomatch
+4> string:find("ab..cd..ef", "x", trailing).
+nomatch
+
+1> string:is_empty("foo").
+false
+2> string:is_empty(["",<<>>]).
+true
+
+1> string:length("ß↑e̊").
+3
+2> string:length(<<195,159,226,134,145,101,204,138>>).
+3
+
+1> string:lexemes("abc de̊fxxghix jkl\r\nfoo", "x e" ++ [[$\r,$\n]]).
+["abc","de̊f","ghi","jkl","foo"]
+2> string:lexemes(<<"abc de̊fxxghix jkl\r\nfoo"/utf8>>, "x e" ++ [$\r,$\n]).
+[<<"abc">>,<<"de̊f"/utf8>>,<<"ghi">>,<<"jkl\r\nfoo">>]
+
+2> string:lowercase(string:uppercase("Michał")).
+"michał"
+
+1> string:next_codepoint(unicode:characters_to_binary("e̊fg")).
+[101|<<"̊fg"/utf8>>]
+
+1> string:next_grapheme(unicode:characters_to_binary("e̊fg")).
+["e̊"|<<"fg">>]
+
+1> string:nth_lexeme("abc.de̊f.ghiejkl", 3, ".e").
+"ghi"
+
+1> string:pad(<<"He̊llö"/utf8>>, 8).
+[<<72,101,204,138,108,108,195,182>>,32,32,32]
+2> io:format("'~ts'~n",[string:pad("He̊llö", 8, leading)]).
+' He̊llö'
+3> io:format("'~ts'~n",[string:pad("He̊llö", 8, both)]).
+' He̊llö '
+
+1> string:prefix(<<"prefix of string">>, "pre").
+<<"fix of string">>
+2> string:prefix("pre", "prefix").
+nomatch
+ lists:join(Replacement, split(String, SearchPattern, Where)).
+
+1> string:replace(<<"ab..cd..ef">>, "..", "*").
+[<<"ab">>,"*",<<"cd..ef">>]
+2> string:replace(<<"ab..cd..ef">>, "..", "*", all).
+[<<"ab">>,"*",<<"cd">>,"*",<<"ef">>]
+
+1> Reverse = string:reverse(unicode:characters_to_nfd_binary("ÅÄÖ")).
+[[79,776],[65,776],[65,778]]
+2> io:format("~ts~n",[Reverse]).
+ÖÄÅ
+
+1> string:slice(<<"He̊llö Wörld"/utf8>>, 4).
+<<"ö Wörld"/utf8>>
+2> string:slice(["He̊llö ", <<"Wörld"/utf8>>], 4,4).
+"ö Wö"
+3> string:slice(["He̊llö ", <<"Wörld"/utf8>>], 4,50).
+"ö Wörld"
+
+0> string:split("ab..bc..cd", "..").
+["ab","bc..cd"]
+1> string:split(<<"ab..bc..cd">>, "..", trailing).
+[<<"ab..bc">>,<<"cd">>]
+2> string:split(<<"ab..bc....cd">>, "..", all).
+[<<"ab">>,<<"bc">>,<<>>,<<"cd">>]
+
+5> string:take("abc0z123", lists:seq($a,$z)).
+{"abc","0z123"}
+6> string:take(<<"abc0z123">>, lists:seq($0,$9), true, leading).
+{<<"abc">>,<<"0z123">>}
+7> string:take("abc0z123", lists:seq($0,$9), false, trailing).
+{"abc0z","123"}
+8> string:take(<<"abc0z123">>, lists:seq($a,$z), true, trailing).
+{<<"abc0z">>,<<"123">>}
+
+1> string:titlecase("ß is a SHARP s").
+"Ss is a SHARP s"
+
+> {F1,Fs} = string:to_float("1.0-1.0e-1"),
+> {F2,[]} = string:to_float(Fs),
+> F1+F2.
+0.9
+> string:to_float("3/2=1.5").
+{error,no_float}
+> string:to_float("-1.5eX").
+{-1.5,"eX"}
+
+> {I1,Is} = string:to_integer("33+22"),
+> {I2,[]} = string:to_integer(Is),
+> I1-I2.
+11
+> string:to_integer("0.5").
+{0,".5"}
+> string:to_integer("x=2").
+{error,no_integer}
+
+1> string:to_graphemes("ß↑e̊").
+[223,8593,[101,778]]
+2> string:to_graphemes(<<"ß↑e̊"/utf8>>).
+[223,8593,[101,778]]
+
+1> string:trim("\t Hello \n").
+"Hello"
+2> string:trim(<<"\t Hello \n">>, leading).
+<<"Hello \n">>
+3> string:trim(<<".Hello.\n">>, trailing, "\n.").
+<<".Hello">>
+
+1> string:uppercase("Michał").
+"MICHAŁ"
+
> string:cspan("\t abcdef", " \t").
@@ -105,21 +736,15 @@
-
> join(["one", "two", "three"], ", ").
@@ -137,6 +762,10 @@
fixed. If
This function is
Example:
> string:left("Hello",10,$.).
@@ -149,6 +778,9 @@
Return the length of a string.
Returns the number of characters in String .
+ This function is obsolete .
+ Use
+ length/1 .
@@ -160,6 +792,9 @@
Returns the index of the last occurrence of
Character in String . Returns
0 if Character does not occur.
+ This function is obsolete .
+ Use
+ find/3 .
@@ -173,6 +808,9 @@
fixed. If the length of (String ) <
Number , then String is padded
with blanks or Character s.
+ This function is obsolete .
+ Use
+ pad/3 .
Example:
> string:right("Hello", 10, $.).
@@ -188,6 +826,9 @@
SubString begins in String .
Returns 0 if SubString
does not exist in String .
+ This function is obsolete .
+ Use
+ find/3 .
Example:
> string:rstr(" Hello Hello World World ", "Hello World").
@@ -202,6 +843,9 @@
Returns the length of the maximum initial segment of
String , which consists entirely of characters
from Chars .
+ This function is obsolete .
+ Use
+ take/2 .
Example:
> string:span("\t abcdef", " \t").
@@ -217,6 +861,9 @@
SubString begins in String .
Returns 0 if SubString
does not exist in String .
+ This function is obsolete .
+ Use
+ find/2 .
Example:
> string:str(" Hello Hello World World ", "Hello World").
@@ -230,12 +877,15 @@
Strip leading or trailing characters.
- Returns a string, where leading and/or trailing blanks or a
+
Returns a string, where leading or trailing, or both, blanks or a
number of Character have been removed.
Direction , which can be left , right ,
or both , indicates from which direction blanks are to be
removed. strip/1 is equivalent to
strip(String, both) .
+ This function is obsolete .
+ Use
+ trim/3 .
Example:
> string:strip("...Hello.....", both, $.).
@@ -251,6 +901,9 @@
Returns a substring of String , starting at
position Start to the end of the string, or to
and including position Stop .
+ This function is obsolete .
+ Use
+ slice/3 .
Example:
sub_string("Hello World", 4, 8).
@@ -266,6 +919,9 @@ sub_string("Hello World", 4, 8).
Returns a substring of String , starting at
position Start , and ending at the end of the
string or at length Length .
+ This function is obsolete .
+ Use
+ slice/3 .
Example:
> substr("Hello World", 4, 5).
@@ -281,6 +937,9 @@ sub_string("Hello World", 4, 8).
Returns the word in position Number of
String . Words are separated by blanks or
Character s.
+ This function is obsolete .
+ Use
+ nth_lexeme/3 .
Example:
> string:sub_word(" Hello old boy !",3,$o).
@@ -288,50 +947,6 @@ sub_string("Hello World", 4, 8).
-
-
- Returns a float whose text representation is the integers
- (ASCII values) in a string.
-
- Argument String is expected to start with a
- valid text represented float (the digits are ASCII values).
- Remaining characters in the string after the float are returned in
- Rest .
- Example:
-
-> {F1,Fs} = string:to_float("1.0-1.0e-1"),
-> {F2,[]} = string:to_float(Fs),
-> F1+F2.
-0.9
-> string:to_float("3/2=1.5").
-{error,no_float}
-> string:to_float("-1.5eX").
-{-1.5,"eX"}
-
-
-
-
-
- Returns an integer whose text representation is the integers
- (ASCII values) in a string.
-
- Argument String is expected to start with a
- valid text represented integer (the digits are ASCII values).
- Remaining characters in the string after the integer are returned in
- Rest .
- Example:
-
-> {I1,Is} = string:to_integer("33+22"),
-> {I2,[]} = string:to_integer(Is),
-> I1-I2.
-11
-> string:to_integer("0.5").
-{0,".5"}
-> string:to_integer("x=2").
-{error,no_integer}
-
-
-
@@ -346,6 +961,11 @@ sub_string("Hello World", 4, 8).
The specified string or character is case-converted. Notice that
the supported character set is ISO/IEC 8859-1 (also called Latin 1);
all values outside this set are unchanged
+ This function is obsolete use
+ lowercase/1 ,
+ uppercase/1 ,
+ titlecase/1 or
+ casefold/1 .
@@ -363,6 +983,9 @@ sub_string("Hello World", 4, 8).
adjacent separator characters in String
are treated as one. That is, there are no empty
strings in the resulting list of tokens.
+ This function is obsolete .
+ Use
+ lexemes/2 .
@@ -373,6 +996,9 @@ sub_string("Hello World", 4, 8).
Returns the number of words in String , separated
by blanks or Character .
+ This function is obsolete .
+ Use
+ lexemes/2 .
Example:
> words(" Hello old boy!", $o).
@@ -387,10 +1013,7 @@ sub_string("Hello World", 4, 8).
other. The reason is that this string package is the
combination of two earlier packages and all functions of
both packages have been retained.
-
-
- Any undocumented functions in string are not to be used.
-
+
--
cgit v1.2.3