Cocktail Sort!

Cocktail Sort is something I came to know of recently. Its much better alternative to Bubble Sort and performs much better.

Most folks should be aware that, sorting algorithms have a fancy property. For relatively small input (<20 maybe) elements the naive algorithms, like Bubble, Insertion and now Cocktail can outperform QuickSort and MergeSort etc. And if you don’t know why so, try to figure out 🙂 So how much can we save by using them in QuickSort ?

So I wrote a small program which takes a bunch of randomly generated numbers and records the run time of sorting them with different algorithms viz. std::sort of C++, qsort of C and my own written Cocktail Sort (I did test my code extensively and it seems to work 😀 ). It does seem that Cocktail outperforms Quick and std::sort for <25 elements. Repeatedly running the experiment does confirm this. But I guess the complexity of incorporating this in qsort or std::sort is more painful than the gain 🙂

Plus this is the first C++11 program of my blog! From now on, I will try to use as much as C++11 as I can but not excessive 😀

// You need C++11 for this to compile!
// You should be having it, so compile this with 'g++ -std=c++11 filename.cc -O3'
// ./a.out N1 N2
// where N1 < N2 and the test will run for array sizes of N1, N1 + 1, ... N2
// and produce result for all the iterations.

#include <algorithm>
#include <chrono>
#include <iostream>
#include <random>
#include <vector>

#include <cassert>
#include <cstdlib>
#include <climits>

void CocktailSort(std::vector<int>& num, int start, int end) {
  const int i_end = end - 1;
  bool swapped = false;
  do {
    for (int i = start; i <= i_end; ++i) {
      if (num[i] > num[i + 1]) {
        std::swap(num[i], num[i + 1]);
        swapped = true;
      }
    }
    if (!swapped) break;
    swapped = false;
    for (int i = end - 1; i >= start; --i) {
      if (num[i] > num[i + 1]) {
        std::swap(num[i], num[i + 1]);
        swapped = true;
      }
    }
  } while (swapped);
}

// This sort works by following a simple invariant. Every time the inner
// for loop completes, we place the min and max elements of the iterated
// subarray at the two ends. We need to do this exactly n / 2 times,
// since with each iteration we place 2 elements correctly.
// I saw the animation on wiki page of Cocktail sort and without reading
// the pseudo code thought of this algorithm and 'assumed' this should be
// Cocktail sort. Well, it is not. But for the ~10k runs of comparison, this
// out performs Cocktail sort, so will use this in QSort.
// Above Cocktail is for demo!
void MyBadSort(std::vector<int>& num, const int start, const int end) {
  const int nelem = end - start + 1;
  if (nelem == 1) return;
  if (nelem == 2) {
    if (num[start] > num[end]) std::swap(num[start], num[end]);
    return;
  }
  int found = start;
  const int n_iter = nelem / 2;
  for (int ix = 0; ix < n_iter; ++ix) {
    const int loc = end - ix;
    int min = num[found];
    int max = num[loc];
    if (min > max) {
      std::swap(min, max);
      std::swap(num[found], num[loc]);
    }
    const int upto = end - ix;
    for (int i = found + 1; i < upto; ++i) {
      if (num[i] < min) {
        min = num[i];
        std::swap(num[found], num[i]);
      }
      if (num[i] > max) {
        max = num[i];
        std::swap(num[loc], num[i]);
      }
    }
    ++found;
  }
}

void MyQSortNaive(std::vector<int>& num, const int start, const int end) {
  const int nelem = end - start + 1;
  if (nelem == 1) return;
  if (nelem == 2) {
    if (num[start] > num[end]) std::swap(num[start], num[end]);
    return;
  }
  const int pivot = num[start];
  int pos = start;
  for (int i = start; i <= end; ++i) {
    if (num[i] < pivot) {
      std::swap(num[pos], num[i]);
      ++pos;
    }
  }
  if (pos > start) MyQSortNaive(num, start, pos - 1);
  for (int i = pos; i <= end; ++i) {
    if (num[i] == pivot) {
      std::swap(num[pos], num[i]);
      ++pos;
    }
  }
  if (pos < end) MyQSortNaive(num, pos, end);
}

void MyQSort(std::vector<int>& num, const int start, const int end) {
  const int nelem = end - start + 1;
  if (nelem == 1) return;
  if (nelem == 2) {
    if (num[start] > num[end]) std::swap(num[start], num[end]);
    return;
  }
  if (nelem == 3) {
    if (num[start] > num[start + 1]) std::swap(num[start], num[start + 1]);
    if (num[start + 1] > num[end]) std::swap(num[start + 1], num[end]);
    if (num[start] > num[start + 1]) std::swap(num[start], num[start + 1]);
    return;
  }
  if (nelem <= 10) {
    MyBadSort(num, start, end);
    return;
  }
  const int pivot = num[start];
  int pos = start;
  for (int i = start; i <= end; ++i) {
    if (num[i] < pivot) {
      std::swap(num[pos], num[i]);
      ++pos;
    }
  }
  if (pos > start) MyQSort(num, start, pos - 1);
  for (int i = pos; i <= end; ++i) {
    if (num[i] == pivot) {
      std::swap(num[pos], num[i]);
      ++pos;
    }
  }
  if (pos < end) MyQSort(num, pos, end);
}

int main(int argc, char** argv) {
  assert(argc == 3);
  const int MOD_VALUE = 1000;  // INT_MAX
  unsigned long long int std_qsort = 0, my_qsort_naive = 0, my_qsort = 0;
  for (int run = std::stoi(argv[1]); run <= std::stoi(argv[2]); ++run) {
    unsigned seed1 = std::chrono::system_clock::now()
        .time_since_epoch().count();
    std::mt19937 rnd(seed1 + run * 1000);
    int N = run;
    std::cout << "\n\nRunning comparison with " << run
              << " elements to sort\n";
    std::vector<int> num;
    num.reserve(N);

    // C++ std::sort
    num.clear();
    for (int i = 0; i < N; ++i) num.push_back(rnd() % MOD_VALUE);
    std::shuffle(num.begin(), num.end(), rnd);
    auto start = std::chrono::high_resolution_clock::now();
    std::sort(num.begin(), num.end());
    auto end = std::chrono::high_resolution_clock::now();
    auto elapsed = std::chrono::duration_cast
      <std::chrono::microseconds>(end - start);
    assert(std::is_sorted(num.begin(), num.end()));
    std_qsort += elapsed.count();
    // std::cout << "Time spent in std::sort: " << elapsed.count() << "\n";

    // My QSort implementation without the Cocktail support
    num.clear();
    for (int i = 0; i < N; ++i) num.push_back(rnd() % MOD_VALUE);
    std::shuffle(num.begin(), num.end(), rnd);
    start = std::chrono::high_resolution_clock::now();
    // qsort(&num[0], num.size(), sizeof(num[0]), cmp);
    MyQSortNaive(num, 0, num.size() - 1);
    end = std::chrono::high_resolution_clock::now();
    elapsed = std::chrono::duration_cast
      <std::chrono::microseconds>(end - start);
    assert(std::is_sorted(num.begin(), num.end()));
    // std::cout << "Time spent in Naive Qsort: " << elapsed.count() << "\n";
    my_qsort_naive += elapsed.count();

    // My QSort with Cocktail support
    num.clear();
    for (int i = 0; i < N; ++i) num.push_back(rnd() % MOD_VALUE);
    std::shuffle(num.begin(), num.end(), rnd);
    start = std::chrono::high_resolution_clock::now();
    MyQSort(num, 0, num.size() - 1);
    end = std::chrono::high_resolution_clock::now();
    elapsed = std::chrono::duration_cast
      <std::chrono::microseconds>(end - start);
    assert(std::is_sorted(num.begin(), num.end()));
    // std::cout << "Time spent in MyQsort with sorter: " << elapsed.count() << "\n";
    my_qsort += elapsed.count();
  }
  std::cout << "\n\n";
  std::cout << "Total time taken for std::sort: " << std_qsort << "\n";
  std::cout << "Total time taken for naive myqsort: " << my_qsort_naive << "\n";
  std::cout << "Total time taken for myqsort: " << my_qsort << "\n";
  return 0;
}

You should try to run this for small N eg. ./a.out 10 12 or ./a.out 12 15 etc.

Advertisements

Knuth Morrison Pratt (KMP) String Matching Algorithm

I have always wanted to understand the KMP Algorithm and write a program for it and finally I got around doing it. I had this big misconception that its a very tedious algorithm and does some black magic but, as with all other Knuth’s algorithms, this is a beauty 🙂

Following is a working 😀 program, which does not handle corner cases, empty strings and UTF-8 etc. But works for sane inputs.

#include <iostream>
#include <string>
#include <cstring>
#include <vector>
using namespace std;

vector<int> BuildLookupTable(const string& needle) {
  vector<int> ret_val;
  ret_val.resize(needle.length());
  size_t pos = 2, prefix_pos = 0;
  ret_val[0] = -1;
  ret_val[1] = 0;
  while (pos < needle.length()) {
    if (needle.at(pos - 1) == needle.at(prefix_pos)) {
      ret_val[pos++] = ++prefix_pos;
    } else if (prefix_pos > 0) {
      prefix_pos = ret_val[prefix_pos];
    } else {
      ret_val[pos++] = 0;
    }
  }
  return ret_val;
}

string FindNeedleInHaystackUsingKMP(const string& needle,
                                    const string& haystack) {
  vector<int> lookup_table = BuildLookupTable(needle);
  size_t haystack_index = 0;
  size_t needle_index = 0;
  while (haystack_index + needle_index < haystack.length()) {
    if (haystack.at(haystack_index + needle_index)
        == needle.at(needle_index)) {
      ++needle_index;
      if (needle_index == needle.length()) return "Found.";
    } else {
      haystack_index = haystack_index + needle_index
          - lookup_table[needle_index];
      if (lookup_table[needle_index] == -1) {
        needle_index = 0;
      } else {
        needle_index = lookup_table[needle_index];
      }
    }
  }
  return "Not Found.";
}

int main() {
  string needle, haystack;
  cout << "Enter the needle: ";
  getline(std::cin, needle);
  cout << "Enter the haystack: ";
  getline(std::cin, haystack);
  cout << FindNeedleInHaystackUsingKMP(needle, haystack) << "\n";
  char* ret_val = strstr(haystack.c_str(), needle.c_str());
  if (ret_val == NULL) {
    cout << "(Verifier) : Not Found\n";
  } else {
    cout << "(Verifier) : Found\n";
  }
}

%d bloggers like this: