From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- system/doc/efficiency_guide/listhandling.xml | 241 +++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 system/doc/efficiency_guide/listhandling.xml (limited to 'system/doc/efficiency_guide/listhandling.xml') diff --git a/system/doc/efficiency_guide/listhandling.xml b/system/doc/efficiency_guide/listhandling.xml new file mode 100644 index 0000000000..e9d2dfe556 --- /dev/null +++ b/system/doc/efficiency_guide/listhandling.xml @@ -0,0 +1,241 @@ + + + + +
+ + 20012009 + Ericsson AB. All Rights Reserved. + + + The contents of this file are subject to the Erlang Public License, + Version 1.1, (the "License"); you may not use this file except in + compliance with the License. You should have received a copy of the + Erlang Public License along with this software. If not, it can be + retrieved online at http://www.erlang.org/. + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + the License for the specific language governing rights and limitations + under the License. + + + + List handling + Bjorn Gustavsson + + 2007-11-16 + + listHandling.xml +
+ +
+ Creating a list + +

Lists can only be built starting from the end and attaching + list elements at the beginning. If you use the ++ operator + like this

+ + +List1 ++ List2 + +

you will create a new list which is copy of the elements in List1, + followed by List2. Looking at how lists:append/1 or ++ would be + implemented in plain Erlang, it can be seen clearly that the first list + is copied:

+ + +append([H|T], Tail) -> + [H|append(T, Tail)]; +append([], Tail) -> + Tail. + +

So the important thing when recursing and building a list is to + make sure that you attach the new elements to the beginning of the list, + so that you build a list, and not hundreds or thousands of + copies of the growing result list.

+ +

Let us first look at how it should not be done:

+ +

DO NOT

+ + bad_fib(N, 0, 1, []). + +bad_fib(0, _Current, _Next, Fibs) -> + Fibs; +bad_fib(N, Current, Next, Fibs) -> + bad_fib(N - 1, Next, Current + Next, Fibs ++ [Current]).]]> + +

Here we are not a building a list; in each iteration step we + create a new list that is one element longer than the new previous list.

+ +

To avoid copying the result in each iteration, we must build the list in + reverse order and reverse the list when we are done:

+ +

DO

+ + tail_recursive_fib(N, 0, 1, []). + +tail_recursive_fib(0, _Current, _Next, Fibs) -> + lists:reverse(Fibs); +tail_recursive_fib(N, Current, Next, Fibs) -> + tail_recursive_fib(N - 1, Next, Current + Next, [Current|Fibs]).]]> + +
+ +
+ List comprehensions + +

Lists comprehensions still have a reputation for being slow. + They used to be implemented using funs, which used to be slow.

+ +

In recent Erlang/OTP releases (including R12B), a list comprehension

+ + + +

is basically translated to a local function

+ + +'lc^0'([E|Tail], Expr) -> + [Expr(E)|'lc^0'(Tail, Expr)]; +'lc^0'([], _Expr) -> []. + +

In R12B, if the result of the list comprehension will obviously not be used, + a list will not be constructed. For instance, in this code

+ + + +

or in this code

+ + + [io:put_chars(E) || E <- List]; + ... -> +end, +some_function(...), +. +. +.]]> + +

the value is neither assigned to a variable, nor passed to another function, + nor returned, so there is no need to construct a list and the compiler will simplify + the code for the list comprehension to

+ + +'lc^0'([E|Tail], Expr) -> + Expr(E), + 'lc^0'(Tail, Expr); +'lc^0'([], _Expr) -> []. + +
+ +
+ Deep and flat lists + +

lists:flatten/1 + builds an entirely new list. Therefore, it is expensive, and even + more expensive than the ++ (which copies its left argument, + but not its right argument).

+ +

In the following situations, you can easily avoid calling lists:flatten/1:

+ + + When sending data to a port. Ports understand deep lists + so there is no reason to flatten the list before sending it to + the port. + When calling BIFs that accept deep lists, such as + list_to_binary/1 or + iolist_to_binary/1. + When you know that your list is only one level deep, you can can use + lists:append/1. + + +

Port example

+

DO

+
+      ...
+      port_command(Port, DeepList)
+      ...
+

DO NOT

+
+      ...
+      port_command(Port, lists:flatten(DeepList))
+      ...
+ +

A common way to send a zero-terminated string to a port is the following:

+ +

DO NOT

+
+      ...
+      TerminatedStr = String ++ [0], % String="foo" => [$f, $o, $o, 0]
+      port_command(Port, TerminatedStr)
+      ...
+ +

Instead do like this:

+ +

DO

+
+      ...
+      TerminatedStr = [String, 0], % String="foo" => [[$f, $o, $o], 0]
+      port_command(Port, TerminatedStr) 
+      ...
+ +

Append example

+

DO

+
+      > lists:append([[1], [2], [3]]).
+      [1,2,3]
+      >
+

DO NOT

+
+      > lists:flatten([[1], [2], [3]]).
+      [1,2,3]
+      >
+
+ +
+ Why you should not worry about recursive lists functions + +

In the performance myth chapter, the following myth was exposed: + Tail-recursive functions + are MUCH faster than recursive functions.

+ +

To summarize, in R12B there is usually not much difference between + a body-recursive list function and tail-recursive function that reverses + the list at the end. Therefore, concentrate on writing beautiful code + and forget about the performance of your list functions. In the time-critical + parts of your code (and only there), measure before rewriting + your code.

+ +

Important note: This section talks about lists functions that + construct lists. A tail-recursive function that does not construct + a list runs in constant space, while the corresponding body-recursive + function uses stack space proportional to the length of the list. + For instance, a function that sums a list of integers, should not be + written like this

+ +

DO NOT

+ +recursive_sum([H|T]) -> H+recursive_sum(T); +recursive_sum([]) -> 0. + +

but like this

+ +

DO

+ +sum(L) -> sum(L, 0). + +sum([H|T], Sum) -> sum(T, Sum + H); +sum([], Sum) -> Sum. +
+
+ -- cgit v1.2.3