Introduciton to Erlang : Recursion (2/2)

April 5, 2011 |

Author Trigonakis Vasileios

Accumulators

In several cases, as with the mlists:length/1 example, the non-tail recursive function can be easily turned to a tail recursive one by using the notion of accumulator. An accumulator is an extra argument introduced to a function in order to aggregate the partial results of the function. It turns the “bottom-up” collection of the final result to “top-down”.

In order to add and initialize the accumulator argument one has to introduce an extra function definition.

tlr(...) ->
    tlr(..., Accumulator_initial_value).
 
% the clause that "breaks" the recursion and
% returns the result
tlr(..., Accumulator) -> 
    Accumulator;
tlr(..., Accumulator) ->
    ...,
    Accumulator_new_value = ...,
    ...,
    trl(..., Accumulator_new_value).

Notice that typically you would only export the tlr/1 function and the tlr/2 would remain for inner-module use and not visible to the module’s users.

Examples

mlists:reverse/1

% a benchmark function in order to write
% one argument less :-)
bm(Fun, Args) ->
    % the ?MODULE can be used instead of the
    % name of the module
    timer:tc(?MODULE, Fun, Args).
 
% non tail recursive
reverse([]) ->
    [];
reverse([H | T]) ->
    reverse(T) ++ [H].
 
% tail recursive
% create an extra help function that will include
% the accumulator as an extra argument
reverse_tr(List) ->
    % initialize the accumulator to the proper value
    reverse_tr(List, []).
 
% the help function
% when finished, return the Accumulator
reverse_tr([], Acc) ->
    Acc;
reverse_tr([H | T], Acc) ->
    reverse_tr(T, [H | Acc]).

Lets benchmark it!

1> L = lists:seq(1, 1000).
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
2> mlists:bm(reverse, [L]).
{33519,
 [1000,999,998,997,996,995,994,993,992,991,990,989,988,987,
  986,985,984,983,982,981,980,979,978,977,976,975,974|...]}
3> mlists:bm(reverse_tr, [L]).
{64,
 [1000,999,998,997,996,995,994,993,992,991,990,989,988,987,
  986,985,984,983,982,981,980,979,978,977,976,975,974|...]}
4> L1 = lists:seq(1, 100000). 
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
5> mlists:bm(reverse, [L1]).  
{114642276,
 [100000,99999,99998,99997,99996,99995,99994,99993,99992,
  99991,99990,99989,99988,99987,99986,99985,99984,99983,99982,
  99981,99980,99979,99978,99977,99976,99975,99974|...]}
6> mlists:bm(reverse_tr, [L1]).
{3450,
 [100000,99999,99998,99997,99996,99995,99994,99993,99992,
  99991,99990,99989,99988,99987,99986,99985,99984,99983,99982,
  99981,99980,99979,99978,99977,99976,99975,99974|...]}

In this case you can notice the great performance difference between the tail and non tail recursive solutions.

mlists:seq/2

You have seen me using the lists:seq/2 function frequently. A call to lists:seq(Argument1, Argument2) is used to generate a list that consists of the integers from the sequence Argument1, Argument1 + 1, Argument1 + 2, .., Argument2.

% non tail recursive
seq(L, H) when L > H ->
    [];
seq(L, H) ->
    [L | seq(L + 1, H)].
 
% tail recursive
seq_tr(L, H) ->
    seq_tr(L, H, []).
 
seq_tr(L, H, Accumulator) when L > H ->
    Accumulator;
seq_tr(L, H, Accumulator) ->
    seq_tr(L, H - 1, [H | Accumulator]).
 
% benchmarking
2> mlists:bm(seq, [1, 1000]).
{187,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
3> mlists:bm(seq_tr, [1, 1000]).
{160,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
4> mlists:bm(seq, [1, 10000]).  
{3799,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
5> mlists:bm(seq_tr, [1, 10000]).
{2676,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
6> mlists:bm(seq, [1, 100000]).  
{23706,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
7> mlists:bm(seq_tr, [1, 100000]).
{15528,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}

mlists:sum/1

% non tail recursive
sum([]) ->
    0;
sum([H | T]) when is_number(H) ->
    H + sum(T).
 
% tail recursive
sum_tr(List) ->
    sum_tr(List, 0).
 
sum_tr([], Sum) ->
    Sum;
sum_tr([H | T], Sum) when is_number(H) ->
    sum_tr(T, H + Sum).
 
% benchmarking
1> L = mlists:seq_tr(1, 10000).
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
2> mlists:bm(sum, [L]).
{2387,50005000}
3> mlists:bm(sum_tr, [L]).
{803,50005000}
4> L1 = mlists:seq_tr(1, 100000).
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
5> mlists:bm(sum, [L1]).         
{33556,5000050000}
6> mlists:bm(sum_tr, [L1]).
{26847,5000050000}

mlists:average/1

% solution using non tail recursive functions
average([]) ->
    0;
average(List) ->
    sum(List) / mlists:length(List).
 
% solution using tail recursive functions
average_tr([]) ->
    0;
average_tr(List) ->
    sum_tr(List) / length_tr(List).
 
% optimized tail recursive solution
average_tr1(List) ->
    average_tr1(List, 0, 0).
 
average_tr1([], Sum, Length) ->
    Sum / Length;
average_tr1([H | T], Sum, Length) ->
    average_tr1(T, Sum + H, Length + 1).
 
% benchmarking
2> L = mlists:seq_tr(1, 100000).
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
 23,24,25,26,27,28,29|...]
3> mlists:bm(average, [L]).
{40654,50000.5}
4> mlists:bm(average_tr, [L]).
{28388,50000.5}
5> mlists:bm(average_tr1, [L]).
{24258,50000.5}

fib/1

You can find a detailed post about Fibonacci sequence here. I duplicate some results here in order to show you that, some times, converting the function to tail recursive is the only way.

% non tail recursive
fib(0) ->
    0;
fib(1) ->
    1;
fib(N) ->
    fib(N-1) + fib(N-2).
 
% tail recursive
fib_tr(0) ->
    0;
fib_tr(1) ->
    1;
fib_tr(N) ->
    fib_tr(0, 1, 2, N).
 
fib_tr(L, H, C, N) when C >= N ->
    L + H;
fib_tr(L, H, C, N) ->
    fib_tr(H, L + H, C + 1, N).
 
% benchmarking
2> timer:tc(fib, fib, [20]).
{4252,6765}
3> timer:tc(fib, fib_tr, [20]).
{7,6765}
4> timer:tc(fib, fib, [25]).   
{19440,75025}
5> timer:tc(fib, fib_tr, [25]).
{7,75025}
6> timer:tc(fib, fib_tr, [2500]).
{1925,
 1317090516751949629522763087125316412066606964992507141887746936727530870405038425764503130123186407746570862185871925952766836352119119528156315582632460790383834605654880612657718465632568839245978248473058179422070735553124716385450886640552392273856770672239797164264356927661308349671941673643205733343592701716715788255170679575500279186053316365583259186927359351023387298371686222860827415371443553759953659514120882763808142593366402472251348360008915585215291504984371697523871199553935714056959634778700594751875
}

In this case, the difference is so huge because, apart from the lack of tail recursion, the first solution has repetition of computations. What I mean is that in order to calculate fib(5), for example, fib(3) will be calculated twice (once for fib(5) = fib(4) + fib(3) and once for fib(4) = fib(3) + fib(2)).

General Recursion Examples

In the following examples I will not do conversion from non tail recursive to tail recursive.

factorial/1

factorial(0) ->
    1;
factorial(N) ->
    N * factorial(N - 1).

mlists:append/2

A call to append(List1, List2) appends the elements from List1 to the beginning of List2.

append([], List) ->
    List;
append([H | T], List) ->
    [H | append(T, List)].
 
% examples
1> mlists:append([], [7]).
[7]
2> mlists:append([1], [7, 11, 13]). 
[1,7,11,13]
3> mlists:append([1, 2, 3], [7, 11, 13]).
[1,2,3,7,11,13]

mlists:all/2

all(Pred, List) returns true if all items belonging to the the List return true for the predicate Pred.

all(_, []) ->
    true;
all(Pred, [H | T]) ->
    case Pred(H) of
	true ->
	    all(Pred, T);
	_ ->
	    false
    end.
 
% example
1> L = [1, 2, 3, 4, 5].      
[1,2,3,4,5]
2> mlists:all(fun(X) -> X < 6 end, L).
true
3> mlists:all(fun(X) -> X < 4 end, L).
false

mlists:delete/2

delete(Elem, List) returns the List without the first occurrence of element Elem (if any).

delete(_, []) ->
    [];
delete(Elem, [Elem | T]) ->
    T;
delete(Elem, [_Elem | T]) ->
    delete(Elem, T).

mlists:flatten/1

flatten(List) returns the flattened version of the List.

flatten([]) ->
    [];
flatten([[H | T] | TT]) ->
    flatten([H | T ++ TT]);
flatten([[] | T]) ->
    flatten(T);
flatten([H | T]) ->
    [H | flatten(T)].
 
% examples
1> mlists:flatten([1,2,3]).
[1,2,3]
2> mlists:flatten([1,[2],3]).
[1,2,3]
3> mlists:flatten([1,[2, a, [b, [c 1="d," 2="e" language=","][/c]]], 3, [1, {2, [3, 6]}]]).
[1,2,a,b,c,d,e,3,1,{2,[3,6]}]

Sorting

mlists:mergesort/1

merge([], []) ->
    [];
merge(List, []) ->
    List;
merge([], List) ->
    List;
merge([H1 | T1], L2 = [H2 | _]) when H1 < H2 ->
    [H1 | merge(T1, L2)];
merge(L1, [H2 | T2]) ->
    [H2 | merge(L1, T2)].
 
split(N, List) ->
    split(N, List, []).
 
split(0, List, List2) ->
    {reverse_tr(List2), List};
split(N, [H | T], List2) ->
    split(N - 1, T, [H | List2]).
 
mergesort([]) ->
    [];
mergesort([I]) ->
    [I];
mergesort(List) ->
    Middle = length_tr(List) div 2,
    {L1, L2} = split(Middle, List),
    merge(mergesort(L1), mergesort(L2)).

mlists:quicksort

split_pivot([]) ->
    [];
split_pivot([H | T]) ->
    split_pivot(H, T, [], []).
 
split_pivot(Pivot, [], Lower, Higher) ->
    {Lower, Pivot, Higher};
split_pivot(Pivot, [H | T], Lower, Higher) when H < Pivot ->
    split_pivot(Pivot, T, [H | Lower], Higher);
split_pivot(Pivot, [H | T], Lower, Higher) ->
    split_pivot(Pivot, T, Lower, [H | Higher]).
 
quicksort([]) ->
    [];
quicksort(List = [_ | _]) ->
    {Lower, Pivot, Higher} = split_pivot(List),
    append(quicksort(Lower), [Pivot | quicksort(Higher)]).

Benchmarking Merge/Quick-sort

Worst-case performance for quicksort is O(n²), while for mergesort is O(n * log(n)).

1> L = mlists:reverse_tr(mlists:seq_tr(1, 1000)).
[1000,999,998,997,996,995,994,993,992,991,990,989,988,987,
 986,985,984,983,982,981,980,979,978,977,976,975,974,973,972|...]
2> mlists:bm(mergesort, [L]).
{5675,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
3> mlists:bm(quicksort, [L]).
{65940,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
4> L1 = mlists:reverse_tr(mlists:seq_tr(1, 10000)).
[10000,9999,9998,9997,9996,9995,9994,9993,9992,9991,9990,
 9989,9988,9987,9986,9985,9984,9983,9982,9981,9980,9979,9978,
 9977,9976,9975,9974,9973,9972|...]
5> mlists:bm(mergesort, [L1]).                     
{48904,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}
6> mlists:bm(quicksort, [L1]).                     
{6045564,
 [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
  23,24,25,26,27|...]}

Excercises

Try to play with the following examples. They are all simple!

Write the mlists:max/1 and mlists:min/1 function that return the max and min element of a list respectively. Then define the mlists:mmlsa/1 that returns the tuple {Min, Max, Length, Sum, Average} and performs only 1 pass of the list.
Convert the functions factorial/1, mlists:append/2, and mlists:flatten/1 to tail recursive. Benchmark them against the non tail recursive.
Write the function mlists:nth(N, List) that returns the Nth element of the List.
Write a non tail and a tail recursive definition for mlists:takewhile(Pred, List) that returns the elements of the List up till the one that returns false when applied to Pred (if any).

Next post will be about Erlang’s predefined modules and Built-in Functions (BIFs).

Series NavigationIntroduction to Erlang : Recursion (1/2)Introduction to Erlang : BIFs & Predefined Modules

Posted in Erlang, Erlang Series, Programming |

Tags: erlang, recursion, series

Distributed Life