Introduciton to Erlang : Recursion (2/2)

This entry is part 10 of 16 in the series Introduction to Erlang

Accumulators

In several cases, as with the mlists:length/1 example, the non-tail recursive function can be easily turned to a tail recursive one by using the notion of accumulator. An accumulator is an extra argument introduced to a function in order to aggregate the partial results of the function. It turns the “bottom-up” collection of the final result to “top-down”.

In order to add and initialize the accumulator argument one has to introduce an extra function definition.

tlr(...) ->
    tlr(..., Accumulator_initial_value).
 
% the clause that "breaks" the recursion and
% returns the result
tlr(..., Accumulator) -> 
    Accumulator;
tlr(..., Accumulator) ->
    ...,
    Accumulator_new_value = ...,
    ...,
    trl(..., Accumulator_new_value).

Notice that typically you would only export the tlr/1 function and the tlr/2 would remain for inner-module use and not visible to the module’s users.

Examples

mlists:reverse/1
% a benchmark function in order to write
% one argument less :-)
bm(Fun, Args) ->
    % the ?MODULE can be used instead of the
    % name of the module
    timer:tc(?MODULE, Fun, Args).
 
% non tail recursive
reverse([]) ->
    [];
reverse([H | T]) ->
    reverse(T) ++ [H].
 
% tail recursive
% create an extra help function that will include
% the accumulator as an extra argument
reverse_tr(List) ->
    % initialize the accumulator to the proper value
    reverse_tr(List, []).
 
% the help function
% when finished, return the Accumulator
reverse_tr([], Acc) ->
    Acc;
reverse_tr([H | T], Acc) ->
    reverse_tr(T, [H | Acc]).

Lets benchmark it!

1> L = lists:seq(1, 1000).
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
2> mlists:bm(reverse, [L]).
{33519,
 [1000,999,998,997,996,995,994,993,992,991,990,989,988,987,
  986,985,984,983,982,981,980,979,978,977,976,975,974|...]}
3> mlists:bm(reverse_tr, [L]).
{64,
 [1000,999,998,997,996,995,994,993,992,991,990,989,988,987,
  986,985,984,983,982,981,980,979,978,977,976,975,974|...]}
4> L1 = lists:seq(1, 100000). 
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
5> mlists:bm(reverse, [L1]).  
{114642276,
 [100000,99999,99998,99997,99996,99995,99994,99993,99992,
  99991,99990,99989,99988,99987,99986,99985,99984,99983,99982,
  99981,99980,99979,99978,99977,99976,99975,99974|...]}
6> mlists:bm(reverse_tr, [L1]).
{3450,
 [100000,99999,99998,99997,99996,99995,99994,99993,99992,
  99991,99990,99989,99988,99987,99986,99985,99984,99983,99982,
  99981,99980,99979,99978,99977,99976,99975,99974|...]}

In this case you can notice the great performance difference between the tail and non tail recursive solutions.

mlists:seq/2

You have seen me using the lists:seq/2 function frequently. A call to lists:seq(Argument1, Argument2) is used to generate a list that consists of the integers from the sequence Argument1, Argument1 + 1, Argument1 + 2, .., Argument2.

% non tail recursive
seq(L, H) when L > H ->
    [];
seq(L, H) ->
    [L | seq(L + 1, H)].
 
% tail recursive
seq_tr(L, H) ->
    seq_tr(L, H, []).
 
seq_tr(L, H, Accumulator) when L > H ->
    Accumulator;
seq_tr(L, H, Accumulator) ->
    seq_tr(L, H - 1, [H | Accumulator]).
 
% benchmarking
2> mlists:bm(seq, [1, 1000]).
{187,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
3> mlists:bm(seq_tr, [1, 1000]).
{160,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
4> mlists:bm(seq, [1, 10000]).  
{3799,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
5> mlists:bm(seq_tr, [1, 10000]).
{2676,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
6> mlists:bm(seq, [1, 100000]).  
{23706,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
7> mlists:bm(seq_tr, [1, 100000]).
{15528,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
mlists:sum/1
% non tail recursive
sum([]) ->
    0;
sum([H | T]) when is_number(H) ->
    H + sum(T).
 
% tail recursive
sum_tr(List) ->
    sum_tr(List, 0).
 
sum_tr([], Sum) ->
    Sum;
sum_tr([H | T], Sum) when is_number(H) ->
    sum_tr(T, H + Sum).
 
% benchmarking
1> L = mlists:seq_tr(1, 10000).
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
2> mlists:bm(sum, [L]).
{2387,50005000}
3> mlists:bm(sum_tr, [L]).
{803,50005000}
4> L1 = mlists:seq_tr(1, 100000).
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
5> mlists:bm(sum, [L1]).         
{33556,5000050000}
6> mlists:bm(sum_tr, [L1]).
{26847,5000050000}
mlists:average/1
% solution using non tail recursive functions
average([]) ->
    0;
average(List) ->
    sum(List) / mlists:length(List).
 
% solution using tail recursive functions
average_tr([]) ->
    0;
average_tr(List) ->
    sum_tr(List) / length_tr(List).
 
% optimized tail recursive solution
average_tr1(List) ->
    average_tr1(List, 0, 0).
 
average_tr1([], Sum, Length) ->
    Sum / Length;
average_tr1([H | T], Sum, Length) ->
    average_tr1(T, Sum + H, Length + 1).
 
% benchmarking
2> L = mlists:seq_tr(1, 100000).
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
3> mlists:bm(average, [L]).
{40654,50000.5}
4> mlists:bm(average_tr, [L]).
{28388,50000.5}
5> mlists:bm(average_tr1, [L]).
{24258,50000.5}
fib/1

You can find a detailed post about Fibonacci sequence here. I duplicate some results here in order to show you that, some times, converting the function to tail recursive is the only way.

% non tail recursive
fib(0) ->
    0;
fib(1) ->
    1;
fib(N) ->
    fib(N-1) + fib(N-2).
 
% tail recursive
fib_tr(0) ->
    0;
fib_tr(1) ->
    1;
fib_tr(N) ->
    fib_tr(0, 1, 2, N).
 
fib_tr(L, H, C, N) when C >= N ->
    L + H;
fib_tr(L, H, C, N) ->
    fib_tr(H, L + H, C + 1, N).
 
% benchmarking
2> timer:tc(fib, fib, [20]).
{4252,6765}
3> timer:tc(fib, fib_tr, [20]).
{7,6765}
4> timer:tc(fib, fib, [25]).   
{19440,75025}
5> timer:tc(fib, fib_tr, [25]).
{7,75025}
6> timer:tc(fib, fib_tr, [2500]).
{1925,
 1317090516751949629522763087125316412066606964992507141887746936727530870405038425764503130123186407746570862185871925952766836352119119528156315582632460790383834605654880612657718465632568839245978248473058179422070735553124716385450886640552392273856770672239797164264356927661308349671941673643205733343592701716715788255170679575500279186053316365583259186927359351023387298371686222860827415371443553759953659514120882763808142593366402472251348360008915585215291504984371697523871199553935714056959634778700594751875
}

In this case, the difference is so huge because, apart from the lack of tail recursion, the first solution has repetition of computations. What I mean is that in order to calculate fib(5), for example, fib(3) will be calculated twice (once for fib(5) = fib(4) + fib(3) and once for fib(4) = fib(3) + fib(2)).

General Recursion Examples

In the following examples I will not do conversion from non tail recursive to tail recursive.

factorial/1

factorial(0) ->
    1;
factorial(N) ->
    N * factorial(N - 1).

mlists:append/2

A call to append(List1, List2) appends the elements from List1 to the beginning of List2.

append([], List) ->
    List;
append([H | T], List) ->
    [H | append(T, List)].
 
% examples
1> mlists:append([], [7]).
[7]
2> mlists:append([1], [7, 11, 13]). 
[1,7,11,13]
3> mlists:append([1, 2, 3], [7, 11, 13]).
[1,2,3,7,11,13]

mlists:all/2

all(Pred, List) returns true if all items belonging to the the List return true for the predicate Pred.

all(_, []) ->
    true;
all(Pred, [H | T]) ->
    case Pred(H) of
	true ->
	    all(Pred, T);
	_ ->
	    false
    end.
 
% example
1> L = [1, 2, 3, 4, 5].      
[1,2,3,4,5]
2> mlists:all(fun(X) -> X < 6 end, L).
true
3> mlists:all(fun(X) -> X < 4 end, L).
false

mlists:delete/2

delete(Elem, List) returns the List without the first occurrence of element Elem (if any).

delete(_, []) ->
    [];
delete(Elem, [Elem | T]) ->
    T;
delete(Elem, [_Elem | T]) ->
    delete(Elem, T).

mlists:flatten/1

flatten(List) returns the flattened version of the List.

flatten([]) ->
    [];
flatten([[H | T] | TT]) ->
    flatten([H | T ++ TT]);
flatten([[] | T]) ->
    flatten(T);
flatten([H | T]) ->
    [H | flatten(T)].
 
% examples
1> mlists:flatten([1,2,3]).
[1,2,3]
2> mlists:flatten([1,[2],3]).
[1,2,3]
3> mlists:flatten([1,[2, a, [b, [c 1="d," 2="e" language=","][/c]]], 3, [1, {2, [3, 6]}]]).
[1,2,a,b,c,d,e,3,1,{2,[3,6]}]

Sorting

mlists:mergesort/1
merge([], []) ->
    [];
merge(List, []) ->
    List;
merge([], List) ->
    List;
merge([H1 | T1], L2 = [H2 | _]) when H1 < H2 ->
    [H1 | merge(T1, L2)];
merge(L1, [H2 | T2]) ->
    [H2 | merge(L1, T2)].
 
split(N, List) ->
    split(N, List, []).
 
split(0, List, List2) ->
    {reverse_tr(List2), List};
split(N, [H | T], List2) ->
    split(N - 1, T, [H | List2]).
 
mergesort([]) ->
    [];
mergesort([I]) ->
    [I];
mergesort(List) ->
    Middle = length_tr(List) div 2,
    {L1, L2} = split(Middle, List),
    merge(mergesort(L1), mergesort(L2)).
mlists:quicksort
split_pivot([]) ->
    [];
split_pivot([H | T]) ->
    split_pivot(H, T, [], []).
 
split_pivot(Pivot, [], Lower, Higher) ->
    {Lower, Pivot, Higher};
split_pivot(Pivot, [H | T], Lower, Higher) when H < Pivot ->
    split_pivot(Pivot, T, [H | Lower], Higher);
split_pivot(Pivot, [H | T], Lower, Higher) ->
    split_pivot(Pivot, T, Lower, [H | Higher]).
 
quicksort([]) ->
    [];
quicksort(List = [_ | _]) ->
    {Lower, Pivot, Higher} = split_pivot(List),
    append(quicksort(Lower), [Pivot | quicksort(Higher)]).
Benchmarking Merge/Quick-sort

Worst-case performance for quicksort is O(n2), while for mergesort is O(n * log(n)).

1> L = mlists:reverse_tr(mlists:seq_tr(1, 1000)).
[1000,999,998,997,996,995,994,993,992,991,990,989,988,987,
 986,985,984,983,982,981,980,979,978,977,976,975,974,973,972|...]
2> mlists:bm(mergesort, [L]).
{5675,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
3> mlists:bm(quicksort, [L]).
{65940,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
4> L1 = mlists:reverse_tr(mlists:seq_tr(1, 10000)).
[10000,9999,9998,9997,9996,9995,9994,9993,9992,9991,9990,
 9989,9988,9987,9986,9985,9984,9983,9982,9981,9980,9979,9978,
 9977,9976,9975,9974,9973,9972|...]
5> mlists:bm(mergesort, [L1]).                     
{48904,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
6> mlists:bm(quicksort, [L1]).                     
{6045564,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}

Excercises

Try to play with the following examples. They are all simple!

  1. Write the mlists:max/1 and mlists:min/1 function that return the max and min element of a list respectively. Then define the mlists:mmlsa/1 that returns the tuple {Min, Max, Length, Sum, Average} and performs only 1 pass of the list.
  2. Convert the functions factorial/1, mlists:append/2, and mlists:flatten/1 to tail recursive. Benchmark them against the non tail recursive.
  3. Write the function mlists:nth(N, List) that returns the Nth element of the List.
  4. Write a non tail and a tail recursive definition for mlists:takewhile(Pred, List) that returns the elements of the List up till the one that returns false when applied to Pred (if any).

Next

Next post will be about Erlang’s predefined modules and Built-in Functions (BIFs).

Series NavigationIntroduction to Erlang : Recursion (1/2)Introduction to Erlang : BIFs & Predefined Modules

Leave a Reply

*

Posts’ Calendar
April 2011
M T W T F S S
« Mar   May »
 123
45678910
11121314151617
18192021222324
252627282930