Cocktail Sort!

Cocktail Sort is something I came to know of recently. Its much better alternative to Bubble Sort and performs much better.

Most folks should be aware that, sorting algorithms have a fancy property. For relatively small input (<20 maybe) elements the naive algorithms, like Bubble, Insertion and now Cocktail can outperform QuickSort and MergeSort etc. And if you don’t know why so, try to figure out 🙂 So how much can we save by using them in QuickSort ?

So I wrote a small program which takes a bunch of randomly generated numbers and records the run time of sorting them with different algorithms viz. std::sort of C++, qsort of C and my own written Cocktail Sort (I did test my code extensively and it seems to work 😀 ). It does seem that Cocktail outperforms Quick and std::sort for <25 elements. Repeatedly running the experiment does confirm this. But I guess the complexity of incorporating this in qsort or std::sort is more painful than the gain 🙂

Plus this is the first C++11 program of my blog! From now on, I will try to use as much as C++11 as I can but not excessive 😀

// You need C++11 for this to compile!
// You should be having it, so compile this with 'g++ -std=c++11 filename.cc -O3'
// ./a.out N1 N2
// where N1 < N2 and the test will run for array sizes of N1, N1 + 1, ... N2
// and produce result for all the iterations.

#include <algorithm>
#include <chrono>
#include <iostream>
#include <random>
#include <vector>

#include <cassert>
#include <cstdlib>
#include <climits>

void CocktailSort(std::vector<int>& num, int start, int end) {
  const int i_end = end - 1;
  bool swapped = false;
  do {
    for (int i = start; i <= i_end; ++i) {
      if (num[i] > num[i + 1]) {
        std::swap(num[i], num[i + 1]);
        swapped = true;
      }
    }
    if (!swapped) break;
    swapped = false;
    for (int i = end - 1; i >= start; --i) {
      if (num[i] > num[i + 1]) {
        std::swap(num[i], num[i + 1]);
        swapped = true;
      }
    }
  } while (swapped);
}

// This sort works by following a simple invariant. Every time the inner
// for loop completes, we place the min and max elements of the iterated
// subarray at the two ends. We need to do this exactly n / 2 times,
// since with each iteration we place 2 elements correctly.
// I saw the animation on wiki page of Cocktail sort and without reading
// the pseudo code thought of this algorithm and 'assumed' this should be
// Cocktail sort. Well, it is not. But for the ~10k runs of comparison, this
// out performs Cocktail sort, so will use this in QSort.
// Above Cocktail is for demo!
void MyBadSort(std::vector<int>& num, const int start, const int end) {
  const int nelem = end - start + 1;
  if (nelem == 1) return;
  if (nelem == 2) {
    if (num[start] > num[end]) std::swap(num[start], num[end]);
    return;
  }
  int found = start;
  const int n_iter = nelem / 2;
  for (int ix = 0; ix < n_iter; ++ix) {
    const int loc = end - ix;
    int min = num[found];
    int max = num[loc];
    if (min > max) {
      std::swap(min, max);
      std::swap(num[found], num[loc]);
    }
    const int upto = end - ix;
    for (int i = found + 1; i < upto; ++i) {
      if (num[i] < min) {
        min = num[i];
        std::swap(num[found], num[i]);
      }
      if (num[i] > max) {
        max = num[i];
        std::swap(num[loc], num[i]);
      }
    }
    ++found;
  }
}

void MyQSortNaive(std::vector<int>& num, const int start, const int end) {
  const int nelem = end - start + 1;
  if (nelem == 1) return;
  if (nelem == 2) {
    if (num[start] > num[end]) std::swap(num[start], num[end]);
    return;
  }
  const int pivot = num[start];
  int pos = start;
  for (int i = start; i <= end; ++i) {
    if (num[i] < pivot) {
      std::swap(num[pos], num[i]);
      ++pos;
    }
  }
  if (pos > start) MyQSortNaive(num, start, pos - 1);
  for (int i = pos; i <= end; ++i) {
    if (num[i] == pivot) {
      std::swap(num[pos], num[i]);
      ++pos;
    }
  }
  if (pos < end) MyQSortNaive(num, pos, end);
}

void MyQSort(std::vector<int>& num, const int start, const int end) {
  const int nelem = end - start + 1;
  if (nelem == 1) return;
  if (nelem == 2) {
    if (num[start] > num[end]) std::swap(num[start], num[end]);
    return;
  }
  if (nelem == 3) {
    if (num[start] > num[start + 1]) std::swap(num[start], num[start + 1]);
    if (num[start + 1] > num[end]) std::swap(num[start + 1], num[end]);
    if (num[start] > num[start + 1]) std::swap(num[start], num[start + 1]);
    return;
  }
  if (nelem <= 10) {
    MyBadSort(num, start, end);
    return;
  }
  const int pivot = num[start];
  int pos = start;
  for (int i = start; i <= end; ++i) {
    if (num[i] < pivot) {
      std::swap(num[pos], num[i]);
      ++pos;
    }
  }
  if (pos > start) MyQSort(num, start, pos - 1);
  for (int i = pos; i <= end; ++i) {
    if (num[i] == pivot) {
      std::swap(num[pos], num[i]);
      ++pos;
    }
  }
  if (pos < end) MyQSort(num, pos, end);
}

int main(int argc, char** argv) {
  assert(argc == 3);
  const int MOD_VALUE = 1000;  // INT_MAX
  unsigned long long int std_qsort = 0, my_qsort_naive = 0, my_qsort = 0;
  for (int run = std::stoi(argv[1]); run <= std::stoi(argv[2]); ++run) {
    unsigned seed1 = std::chrono::system_clock::now()
        .time_since_epoch().count();
    std::mt19937 rnd(seed1 + run * 1000);
    int N = run;
    std::cout << "\n\nRunning comparison with " << run
              << " elements to sort\n";
    std::vector<int> num;
    num.reserve(N);

    // C++ std::sort
    num.clear();
    for (int i = 0; i < N; ++i) num.push_back(rnd() % MOD_VALUE);
    std::shuffle(num.begin(), num.end(), rnd);
    auto start = std::chrono::high_resolution_clock::now();
    std::sort(num.begin(), num.end());
    auto end = std::chrono::high_resolution_clock::now();
    auto elapsed = std::chrono::duration_cast
      <std::chrono::microseconds>(end - start);
    assert(std::is_sorted(num.begin(), num.end()));
    std_qsort += elapsed.count();
    // std::cout << "Time spent in std::sort: " << elapsed.count() << "\n";

    // My QSort implementation without the Cocktail support
    num.clear();
    for (int i = 0; i < N; ++i) num.push_back(rnd() % MOD_VALUE);
    std::shuffle(num.begin(), num.end(), rnd);
    start = std::chrono::high_resolution_clock::now();
    // qsort(&num[0], num.size(), sizeof(num[0]), cmp);
    MyQSortNaive(num, 0, num.size() - 1);
    end = std::chrono::high_resolution_clock::now();
    elapsed = std::chrono::duration_cast
      <std::chrono::microseconds>(end - start);
    assert(std::is_sorted(num.begin(), num.end()));
    // std::cout << "Time spent in Naive Qsort: " << elapsed.count() << "\n";
    my_qsort_naive += elapsed.count();

    // My QSort with Cocktail support
    num.clear();
    for (int i = 0; i < N; ++i) num.push_back(rnd() % MOD_VALUE);
    std::shuffle(num.begin(), num.end(), rnd);
    start = std::chrono::high_resolution_clock::now();
    MyQSort(num, 0, num.size() - 1);
    end = std::chrono::high_resolution_clock::now();
    elapsed = std::chrono::duration_cast
      <std::chrono::microseconds>(end - start);
    assert(std::is_sorted(num.begin(), num.end()));
    // std::cout << "Time spent in MyQsort with sorter: " << elapsed.count() << "\n";
    my_qsort += elapsed.count();
  }
  std::cout << "\n\n";
  std::cout << "Total time taken for std::sort: " << std_qsort << "\n";
  std::cout << "Total time taken for naive myqsort: " << my_qsort_naive << "\n";
  std::cout << "Total time taken for myqsort: " << my_qsort << "\n";
  return 0;
}

You should try to run this for small N eg. ./a.out 10 12 or ./a.out 12 15 etc.

Knuth Morrison Pratt (KMP) String Matching Algorithm

I have always wanted to understand the KMP Algorithm and write a program for it and finally I got around doing it. I had this big misconception that its a very tedious algorithm and does some black magic but, as with all other Knuth’s algorithms, this is a beauty 🙂

Following is a working 😀 program, which does not handle corner cases, empty strings and UTF-8 etc. But works for sane inputs.

#include <iostream>
#include <string>
#include <cstring>
#include <vector>
using namespace std;

vector<int> BuildLookupTable(const string& needle) {
  vector<int> ret_val;
  ret_val.resize(needle.length());
  size_t pos = 2, prefix_pos = 0;
  ret_val[0] = -1;
  ret_val[1] = 0;
  while (pos < needle.length()) {
    if (needle.at(pos - 1) == needle.at(prefix_pos)) {
      ret_val[pos++] = ++prefix_pos;
    } else if (prefix_pos > 0) {
      prefix_pos = ret_val[prefix_pos];
    } else {
      ret_val[pos++] = 0;
    }
  }
  return ret_val;
}

string FindNeedleInHaystackUsingKMP(const string& needle,
                                    const string& haystack) {
  vector<int> lookup_table = BuildLookupTable(needle);
  size_t haystack_index = 0;
  size_t needle_index = 0;
  while (haystack_index + needle_index < haystack.length()) {
    if (haystack.at(haystack_index + needle_index)
        == needle.at(needle_index)) {
      ++needle_index;
      if (needle_index == needle.length()) return "Found.";
    } else {
      haystack_index = haystack_index + needle_index
          - lookup_table[needle_index];
      if (lookup_table[needle_index] == -1) {
        needle_index = 0;
      } else {
        needle_index = lookup_table[needle_index];
      }
    }
  }
  return "Not Found.";
}

int main() {
  string needle, haystack;
  cout << "Enter the needle: ";
  getline(std::cin, needle);
  cout << "Enter the haystack: ";
  getline(std::cin, haystack);
  cout << FindNeedleInHaystackUsingKMP(needle, haystack) << "\n";
  char* ret_val = strstr(haystack.c_str(), needle.c_str());
  if (ret_val == NULL) {
    cout << "(Verifier) : Not Found\n";
  } else {
    cout << "(Verifier) : Found\n";
  }
}

Largest sum subarray

Recursion: M[i] = max sum subarray ending here

M[i] = max(M[i-1] + a[i], a[i])

Loved this code 🙂

#include<algorithm>
#include<iostream>

using namespace std;

int main() {
  int n;
  cin >> n;
  int max_sum = 0, run_sum = 0;
  for (int i = 0, t = -1; i < n; ++i) {
    cin >> t;
    run_sum = max(run_sum + t, t);
    max_sum = max(run_sum, max_sum);
  }
  cout << max_sum << endl;
  return 0;
}

Simplicity

How would you define an unsigned int of 32 bits (you can imagine about 64 bits) to be infinite?

some might

const unsigned int kMaxUnsignedInt = 0xFFFFFFFF;

Others (shifting lovers) may

const unsigned int kMaxUnsignedInt =  (((1<<30) - 1) << 2 ) | 3;

Few may

const unsigned int kMaxUnsignedInt = ~0;

I did

const unsigned int kMaxUnsignedInt = -1;

What would you do?

On a side note
What would the following code do?

const unsigned int kMaxUnsigndInt = (1<<31) - 1;

Interview Problems 25

1. Given a set of KEY->VALUE pairs such that each KEY is unique, describe a method of storing these pairs on disk, and a method for accessing the corresponding VALUE given a KEY. Assume that RAM is fixed at 1gb and the set of pairs requires 40gb.

2. Consider there is an array with duplicates and you are given two numbers as input and you have to return the minimum distance between the two in the array with minimum complexity.

3. You are given an array of integers, say array1 of size n. You have to create another array (say array2) and fill its contents by following this rule: array2[i] will hold the value of the left-most integer from the range array1[i+1] to array1[n-1] such that it is greater than array1[i]. If no such element exists, assign -1.

4. An external system has an array. You are given 3 inbuilt functions which perform their operation in O(1) time.
a) get(i): returns ith element of array
b) length(): returns length of array
c) reverse(i, j): reverses elements of subarray starting from i and ending with j (both inclusive)

Write function SortArray() using these three functions.

5. Given a binary tree, find 2 leaf nodes say X and Y such that F(X,Y) is maximum where F(X,Y) = sum of nodes in the path from root to X + sum of nodes in the path from root to Y – sum of nodes in the common path from root to first common ancestor of the Nodes X and Y.

6. Given an array, A, find if all elements in the sorted version of A are consecutive in less than O(nlogn).
eg: A: 5 4 1 2 3 on sorting 1 2 3 4 5 all elements are consecutive — true
A: 1 9 2 22 on sorting 1 2 9 22 all elements are NOT consecutive — false

7. Given a string (assume there no spaces or punctuations), write a code that returns the max. length of the string that has repeated more than once.

Interview Problems 24

1. Given a linked list structure where every node represents a linked list and contains two pointers of its type:
(i) pointer to next node in the main list.
(ii) pointer to a linked list where this node is head.
Write a C function to flatten the list into a single linked list. (from here)

2. Hundred of millions of irregular shape objects are moving in random direction. Provide data structure and algo to detect collision. (from here)

3. You are given an array of integer A[1..n] and a maximum sliding window of size w. you need to output an array B where B[i] is the maximum in A[i, i+w-1]. (from here)

4. [Design] Find duplicate profiles in Facebook.

5. You have an array of 0s and 1s and you want to output all the intervals (i, j) where the number of 0s and numbers of 1s are equal. (from here)

6. You have a stream of random numbers that are inserted into an expanding array. How can you maintain the current median and what is the complexity.

7. Given n arrays, find n number such that sum of their differences is minimum. For e.g. if there are three arrays

A = {4, 10, 15, 20}
B = {1, 13, 29}
C = {5, 14, 28}

find three numbers a, b, c such that |a-b| + |b-c| + |c-a| is minimum. Here the answer is a = 15, b = 13, and c = 14

8. [Coding] Write a C/C++ program to create a bitmap of any size as determined by user. Say user says 64k bitmap, then create 64k long bitmap. Have set and unset methods.

9. Reorder 1st string based on 2nd string.
eg: (tractor,car)
output: carrtto or carrott or carrtot.

10. There is a straight roads with ‘n’ number of milestones. You are given an array with the distance between all the pairs of milestones in some random order. Find the position of milestones.
Example:
Consider a road with 4 milestones(a,b,c,d) :
a bcd
Distance between a and b = 3
Distance between a and c = 8
Distance between a and d = 10
Distance between b and c = 5
Distance between b and d = 7
Distance between c and d = 2
All the above values are given in a random order say 7, 10, 5, 2, 8, 3.
The output must be 3,5,2 or 2,5,3

Change

Never thought people (me for that matter) change so quickly. Not long ago I posted here about how I don’t enjoy computers much but just barely make up with them. Yesterday I had to travel to my parents and I was shocked at how badly I was bored when I was not coding. I had to stop listening to music and open a terminal and start coding. I guess when one does something everyday, it becomes difficult to hate it (arranged marriages? :P). Wanted to start on a task which I could complete so wrote the complete code for reversing all the words in a given string (ya ya … I know .. I know its too easy :D).
Re-wrote all the library functions, have to confirm their behaviour though :P. Do let me know if there are any obvious/major bugs.

#include <stdio.h>
#include <stdlib.h>

#define _DEBUG_HAULT_ scanf("%*d");
#define STR_TERM '\0'
#define MAX_INPUT_LENGTH 1000
#define DELIMITERS " ,.':;"

int my_strlen(const char* str) {
  register int len = -1;
  for (;*(str + ++len););
  return len;
}

char* my_strchr(const char* s, int ch) {
  if (NULL == s) return NULL;
  for (; STR_TERM != *s && *s != ch; ++s);
  return STR_TERM == *s ? NULL : (char *)s;
}

char* my_strrev(const char* str, register int l) {
  register int i = -1;
  char *retVal = (char *)malloc(sizeof(char) * l);
  retVal[0] = retVal[l] = STR_TERM;
  for (; l; retVal[++i] = str[--l]);
  return retVal;
}

char* my_strtok(char *buf, const char* delim) {
  static char* record;
  record = (buf == NULL) ? record : buf;
  if (*record == STR_TERM || NULL == record) return NULL;
  char* temp = record;
  while ((STR_TERM != *record) && (NULL == my_strchr(delim, *record))) {
    ++record;
  }
  if (STR_TERM == *record) return temp;
  while ((STR_TERM != *record) && (NULL != my_strchr(delim, *record))) {
    *record = STR_TERM;
    ++record;
  }
  return temp;
}

// Removes all the punctuations (marked in constant DELIMITERS) and inserts
// only 1 space between words.
int main(int argc, char *argv[]) {
  char input[MAX_INPUT_LENGTH], output[MAX_INPUT_LENGTH];
  input[0] = output[0] = STR_TERM;
  scanf("%[^\n]", input);
  register int len = 0, temp = 0;
  register char* ptr = NULL;
  for (ptr = my_strtok(input, DELIMITERS);
       ptr;
       ptr = my_strtok(NULL, DELIMITERS)) {
    temp = my_strlen(ptr);
    sprintf(output + len, "%s ", my_strrev(ptr, temp));
    len += (temp + 1);
  }
  output[len - 1] = STR_TERM;
  printf("%s\n", output);
  printf("%s\n", my_strrev(output, my_strlen(output)));
  return 0;
}

After I was done, I found out about strsep 😛

%d bloggers like this: