Thursday, July 30, 2015

Quicksort implementation compilation

I made a compilation of different known quick sort partitioning methods, implementations and pivot choosing methods.

Partitioning


  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
void Bentley_McIlroy(int*& lo, int*& hi)
    {
         using std::swap;

         int pivot = *lo;
         int *j = lo, *i = hi--, *p = lo + 1, *q = hi;

         while (true)
         {
             while (pivot < *(--i));
             while (*(++j) < pivot) if (j == hi) break;
             if (j >= i) break;     
             swap(*i, *j);

             if (*i == pivot) swap(*q--, *i);
             if (*j == pivot) swap(*p++, *j);
         }
         int min = std::min(p - lo, i - p + 1);
         for (int f = 0; f < min; ++f)
             swap(*(lo + f), *(i - f));

         int min2 = std::min(hi - q, q - i);
         for (int f = 1; f < min2; ++f)
             swap(*(hi - f), *(i + f));

         lo = ++i - (p - lo);
         hi = i + (hi - q); 
    }

     int* lomutoPartition(int* p, int* r)
     {
         using std::swap;

         int pivot = *(--r), *i = p - 1;

         for (int* j = p; j != r; ++j)
         {
             if (*j <= pivot)
                 swap( *(++i), *j);
         }
         swap(*r, *(++i));
         return i;
     }

     int* hoare(int *lo, int *hi)
     {
         using std::swap;
         int pivot = *lo, *i = lo, *j = hi--;

         while (true)
         {
             while (*(++i) < pivot) if (i == hi) break;
             while (pivot < *(--j) );
            
             if (j <= i) break;
             swap(*i, *j);
         }
         swap(*j, *lo);

         return j;
     }


     void dijkstra_3_way_partitioning(int*& l, int*& g)
     {
         using std::swap;
        
         int *i = l + 1, pivot = *l;        
         while (i < g)
         {
             if (*i < pivot)
                 swap(*i++, *l++);
             else if (*i > pivot)
                 swap(*i, *(--g));
             else
                 i++;
         }
     }

     void dual_pivot_partitioning(int*& L, int*& G)
     {
         using std::swap;
        
        if (*L > *(--G)) swap( *L, *G);

         int pivot1 = *L, *first = L,
             *last = G, pivot2 = *G--, *K = L++;

         while (K <= G)
         {
             if (*K < pivot1)
                 swap(*L++, *K++);
             else if (*K > pivot2)
                 swap(*G--, *K);
             else
                 ++K;
         }
         swap(*(--L), *first);
         swap(*(++G), *last);
     }

Pivot and shuffle


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
std::random_device rd;
std::mt19937 mt(rd());

int* random(int *ini, int *fin)
{
    std::uniform_int_distribution <> dist(0, fin - ini - 1);
    return ini + dist(mt);
}

int* randomMedianOfN(int * ini, int * fin, int N)
{
    if (fin - ini < N) return ini;

    for (int * i = ini; i != ini + N; ++i)
    {
        int * r = Pivots::random(i, fin);
        std::swap(*i, *r);
    }
    std::nth_element(ini, ini + N / 2, ini + N);

    return ini + 1;
}

void fisher_yates_shuffle(int *ini, int *fin)
{
    for (auto i = ini; ini != fin; ++ini)
        std::swap(*Pivots::random(ini, fin), *ini);
}

Quicksort



 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
void B_M_sort(int* ini, int* fin)
{
    if (ini >= fin - 1) return;

    auto ini2 = ini, fin2 = fin;
    Particions::Bentley_McIlroy(ini2, fin2);

    B_M_sort(ini, ini2);
    B_M_sort(fin2, fin);
}

void sort(int* ini, int *fin)
{
    if (ini >= fin - 1) return;

    auto pivot = Particions::hoare(ini, fin);

    sort(ini, pivot);
    sort(pivot + 1, fin);
}

void Lomutosort(int* ini, int *fin)
{
    if (ini >= fin - 1) return;

    auto pivot = Particions::lomutoPartition(ini, fin);

    sort(ini, pivot);
    sort(pivot + 1, fin);
}

void javaSort(int* ini, int *fin)
{
    if (ini >= fin - 1) return;

    auto ini2 = ini, fin2 = fin;
    Particions::dual_pivot_partitioning(ini2, fin2);

    javaSort(ini, ini2);
    if(*ini2 != *fin2) 
        javaSort(ini2 + 1, fin2);
    javaSort(fin2 + 1, fin);
}

void iterativeSort(int* ini, int* fin)
{

    unsigned MaxLength = 2 * static_cast<unsigned> (std::log2l(fin - ini));
    Stack<int*> stack(MaxLength);
    stack.push(nullptr);
    stack.push(nullptr);

    while (!stack.empty())
    {
        while (ini < fin - 1)
        {
            auto ini2 = ini, fin2 = fin;
            Particions::Bentley_McIlroy(ini2, fin2);
            if ((ini2 - ini) >(fin - fin2))
            {
                stack.push(ini2);
                stack.push(ini);
                ini = fin2;
            }
            else
            {
                stack.push(fin);
                stack.push(fin2);
                fin = ini2;
            }
        }
        ini = stack.pop();
        fin = stack.pop();
    }
}

Exercise 4.1-5

4.1-5 Use the following ideas to develop a nonrecursive, linear-time algorithm for the maximum-subarray problem. Start at the left end of the array, and progress toward the right,keeping track of the maximum subarray seen so far. Knowing a maximum subarray of A[1.. j] , extend the answer to find a maximum subarray ending at index j + 1 by using the following observation: a maximum subarray of A[1.. j + 1] is either a maximum subarray of A[1.. j] or a subarray A[i .. j +1] , for some 1 <= i<= j + 1. Determine a maximum subarray of the form A[i... j+1] in constant time based on knowing a maximum subarray ending at index j.

Answer

First we need to figure out the maximum sub array ending at index j + 1 which could be just A[j + 1] or the maximum subarray ending at j plus A[j+1] , therefore we find the max of this two.

Once we have the maximum sub array ending at index j + 1 we find the maximum of A[1..j+1] by again, getting the max between  maximum sub array ending at index j + 1 and maximum subarray of A[1..j].

Code (C++):

void linearMaxSubArray(int*& ini, int*& fin){

    int sum = ini[0], max = ini[0],
        *inisum = ini, *end= fin;

    fin = ini + 1;
    for (auto i = ini + 1; i != end; ++i)
    {
        sum = std::max(*i, *i + sum);  
        inisum = sum == *i ? i : inisum;   
        if (sum > max)
        {
            fin = i + 1;
            ini = inisum;
            max = sum;
        }
    }
   
}

Sunday, July 19, 2015

Problem 6-2 Analisis of d-ary heaps

6-2 Analysis of d-ary heaps A d-ary heap is like a binary heap, but (with one possible exception) non-leaf nodes have d children instead of 2 children.

a. How would you represent a d-ary heap in an array?

b. What is the height of a d-ary heap of n elements in terms of n and d?

c. Give an efficient implementation of EXTRACT-MAX in a d-ary max-heap. Analyze its running time in terms of d and n.

d. Give an efficient implementation of INSERT in a d-ary max-heap. Analyze its running time in terms of d and n.

e. Give an efficient implementation of INCREASE-KEY(A,i,k), which flags an error if k is less than key at index i.

Answer



a) Being f a floor of the heap, the last index of a floor that is full is:



each index k that is in a floor i is bound by:



Using this bounds we can prove it for any element of floor i. 
 x being the index of a given element of floor i we have:

For the parent we have:



Proof;




b) For this question we can do the same we did for the proof of a binary heap height. Being h the height of the heap we can bound n in terms of h and d:


And so the height is:


d) Extract_max is Max_heapify plus constant time  and Max_heapify for each recursive call needs to find the largest of d child, so the running time is:

c) and d)

Both are the running time of swim plus constant time. So their running time is:


d-ary heap implementation in C++(The root of the heap is zero instead of one):

#pragma once
#include <vector>
#include <iostream>
#include <algorithm>

template<class T, class Func= std::less<T>>
class DHeap
{

public:
    DHeap();
    DHeap(unsigned);
    DHeap(unsigned,const Func&);
    DHeap(const Func&);


    T extractTop();
    void insert(const T&);
    void insert(T&&);
    const T& top();
    T changeKey(unsigned, T&&);
    bool empty() const{ return vec.empty(); }

private:
    std::vector<T> vec;
    Func cmp;
    int D;

    void heapify(unsigned);
    void swim(int);
    unsigned maxIndex(unsigned, unsigned) const;
    unsigned firstChild(unsigned) const;
    int parent(int) const;
    void ensureCapacity();
    unsigned findMax(unsigned first, unsigned to) const;
    template<typename U>
    void genericInsert(U&&);
   
    template< typename E>
    friend std::ostream & operator<<(std::ostream &os, const DHeap<T,E>& p)
    {
        for (const T& d : p.vec)
            os << d << ",";
        return os;
    }
};

template <typename T, typename F >
DHeap<T,F>::DHeap() : DHeap(4){}

template <typename T, typename F >
DHeap<T, F>::DHeap(unsigned d) : DHeap(d, F()){}

template <typename T, typename F >
DHeap<T, F>::DHeap(unsigned d, const F& func):D(d),cmp(func){}

template <typename T, typename F >
DHeap<T, F>::DHeap(const F& func) : DHeap(4, func){}

template <typename T, typename F >
T DHeap<T,F>::extractTop()
{
    if (vec.size() < 1)
        throw new std::underflow_error("underflow");
       
    T max = std::move(vec[0]);
    vec[0] = std::move(vec.back());
    vec.pop_back();
    heapify(0);
   
    return max;
}
template<typename T, typename F>
const T& DHeap<T,F>::top()
{
    return vec[0];
}

template<typename T, typename F>
template<typename U>
void DHeap<T,F>::genericInsert(U&& element)
{
    ensureCapacity();
    vec.push_back(std::forward<U>(element));
    swim(vec.size() - 1);
}

template <typename T, typename F >
void DHeap<T,F>::insert(const T& element)
{
    genericInsert(element);
}

template <typename T, typename F >
void DHeap<T,F>::insert(T&& element)
{
    genericInsert(std::move(element));
}

template <typename T, typename F >
T DHeap<T,F>::changeKey(unsigned index, T&& element)
{
    if (cmp(element, vec[index]))
        throw new std::invalid_argument("invalid argument");

    T old = std::move(vec[index]);
    vec[index] = std::move(element);
    swim(index);

    return old;
}

template <typename T, typename F >
void DHeap<T,F>::heapify(unsigned index)
{
    unsigned first = 0;
    while ((first = firstChild(index)) < vec.size())
    {
        unsigned to = std::min(vec.size(), firstChild(index + 1));
        unsigned largest = findMax(first, to);

        if (!cmp(vec[index], vec[largest])) break;

        using std::swap;
        swap(vec[largest], vec[index]);
        index = largest;
    }
}

template<typename T, typename F>
unsigned DHeap<T,F>::maxIndex(unsigned a, unsigned b) const
{
    return cmp(vec[a], vec[b])? b : a;
}


template <typename T, typename F >
void DHeap<T,F>::swim(int index)
{
    using std::swap;
   
    int p = 0;
    while (index > 0 && cmp(vec[p = parent(index)], vec[index]))
    {
        swap(vec[p], vec[index]);
        index = p;
    }

}
template <typename T, typename F >
unsigned DHeap<T,F>::findMax(unsigned first, unsigned to) const
{
    unsigned largest = first++;
    while (first < to)
        largest = maxIndex(largest, first++);
   
    return largest;
}

template <typename T, typename F >
unsigned DHeap<T,F>::firstChild(unsigned index) const
{
    return D* index + 1;
}
template <typename T, typename F >
int DHeap<T,F>::parent(int index) const
{
    return (index - 1) / D;

}
template <typename T, typename F >
void DHeap<T,F>::ensureCapacity()
{
    if (vec.size() == vec.capacity())
        vec.reserve(3 * vec.size() / 2 + 1);
}
 

Sunday, July 12, 2015

Exercise 6.3-3


Show that there are at most nodes of height h in any n-element heap.

Answer

First we know that  nodes are leaves because of exercise 6.1-7. So being f(h) a function that gives us the number of nodes with height h of a heap of length n
we need to proof by induction that:
for the base case we have:

this holds because all the leaves have height 0. Let x be the number of nodes in a heap so that
so the number of leaves is

finally for inductive step we have:



Note: basically the idea is to "remove" each layer of leaves until we reach the ones with de desire heigth.