Given two strings a and b, find whether any anagram of string a is a sub-string of string b

I know this is a fairly (old and) simple question. But what people do not realize is that it has a catch! I saw this question here and browsed through most of the solutions. Almost all of them (except the crazy ones where folks wanted to do brutal force search) would fail on a very simple case. And this question, is all about just that case

#include<iostream>
#include<string>
#include<vector>
using namespace std;
int main() {
  string src, tgt;
  std::getline(cin, src);
  std::getline(cin, tgt);
  vector<int> buf(256, 0);
  const int tot = src.size();
  for (int i = 0; i < src.size(); ++i) {
    buf[src[i]]++;
  }
  vector<int> tmp(256, 0);
  int start = -1, tmp_tot = 0, end = 0;
  for (int i = 0; i < tgt.size(); ++i) {
    const unsigned char ch = tgt.at(i);
    if (buf[ch] > 0) {
      if (tmp[ch] == buf[ch]) {
        if (tmp_tot > 256) {
          tmp.clear();
          tmp.resize(256, 0);
        } else {
          while (tmp_tot > 0) tmp[tgt[i - tmp_tot--]]--;
        }
        start = i;
        tmp[ch]++;
        tmp_tot = 1;
      } else {
        if (start == -1) start = i;
        tmp[ch]++;
        ++tmp_tot;
        if (tmp_tot == tot) end = i;
      }
    }
  }
  if (end > 0) {
    cout << start << "    " << end << std::endl;
  } else {
    cout << "Not found\n";
  }
  return 0;
}
Advertisements

Knuth Morrison Pratt (KMP) String Matching Algorithm

I have always wanted to understand the KMP Algorithm and write a program for it and finally I got around doing it. I had this big misconception that its a very tedious algorithm and does some black magic but, as with all other Knuth’s algorithms, this is a beauty 🙂

Following is a working 😀 program, which does not handle corner cases, empty strings and UTF-8 etc. But works for sane inputs.

#include <iostream>
#include <string>
#include <cstring>
#include <vector>
using namespace std;

vector<int> BuildLookupTable(const string& needle) {
  vector<int> ret_val;
  ret_val.resize(needle.length());
  size_t pos = 2, prefix_pos = 0;
  ret_val[0] = -1;
  ret_val[1] = 0;
  while (pos < needle.length()) {
    if (needle.at(pos - 1) == needle.at(prefix_pos)) {
      ret_val[pos++] = ++prefix_pos;
    } else if (prefix_pos > 0) {
      prefix_pos = ret_val[prefix_pos];
    } else {
      ret_val[pos++] = 0;
    }
  }
  return ret_val;
}

string FindNeedleInHaystackUsingKMP(const string& needle,
                                    const string& haystack) {
  vector<int> lookup_table = BuildLookupTable(needle);
  size_t haystack_index = 0;
  size_t needle_index = 0;
  while (haystack_index + needle_index < haystack.length()) {
    if (haystack.at(haystack_index + needle_index)
        == needle.at(needle_index)) {
      ++needle_index;
      if (needle_index == needle.length()) return "Found.";
    } else {
      haystack_index = haystack_index + needle_index
          - lookup_table[needle_index];
      if (lookup_table[needle_index] == -1) {
        needle_index = 0;
      } else {
        needle_index = lookup_table[needle_index];
      }
    }
  }
  return "Not Found.";
}

int main() {
  string needle, haystack;
  cout << "Enter the needle: ";
  getline(std::cin, needle);
  cout << "Enter the haystack: ";
  getline(std::cin, haystack);
  cout << FindNeedleInHaystackUsingKMP(needle, haystack) << "\n";
  char* ret_val = strstr(haystack.c_str(), needle.c_str());
  if (ret_val == NULL) {
    cout << "(Verifier) : Not Found\n";
  } else {
    cout << "(Verifier) : Found\n";
  }
}

Change

Never thought people (me for that matter) change so quickly. Not long ago I posted here about how I don’t enjoy computers much but just barely make up with them. Yesterday I had to travel to my parents and I was shocked at how badly I was bored when I was not coding. I had to stop listening to music and open a terminal and start coding. I guess when one does something everyday, it becomes difficult to hate it (arranged marriages? :P). Wanted to start on a task which I could complete so wrote the complete code for reversing all the words in a given string (ya ya … I know .. I know its too easy :D).
Re-wrote all the library functions, have to confirm their behaviour though :P. Do let me know if there are any obvious/major bugs.

#include <stdio.h>
#include <stdlib.h>

#define _DEBUG_HAULT_ scanf("%*d");
#define STR_TERM '\0'
#define MAX_INPUT_LENGTH 1000
#define DELIMITERS " ,.':;"

int my_strlen(const char* str) {
  register int len = -1;
  for (;*(str + ++len););
  return len;
}

char* my_strchr(const char* s, int ch) {
  if (NULL == s) return NULL;
  for (; STR_TERM != *s && *s != ch; ++s);
  return STR_TERM == *s ? NULL : (char *)s;
}

char* my_strrev(const char* str, register int l) {
  register int i = -1;
  char *retVal = (char *)malloc(sizeof(char) * l);
  retVal[0] = retVal[l] = STR_TERM;
  for (; l; retVal[++i] = str[--l]);
  return retVal;
}

char* my_strtok(char *buf, const char* delim) {
  static char* record;
  record = (buf == NULL) ? record : buf;
  if (*record == STR_TERM || NULL == record) return NULL;
  char* temp = record;
  while ((STR_TERM != *record) && (NULL == my_strchr(delim, *record))) {
    ++record;
  }
  if (STR_TERM == *record) return temp;
  while ((STR_TERM != *record) && (NULL != my_strchr(delim, *record))) {
    *record = STR_TERM;
    ++record;
  }
  return temp;
}

// Removes all the punctuations (marked in constant DELIMITERS) and inserts
// only 1 space between words.
int main(int argc, char *argv[]) {
  char input[MAX_INPUT_LENGTH], output[MAX_INPUT_LENGTH];
  input[0] = output[0] = STR_TERM;
  scanf("%[^\n]", input);
  register int len = 0, temp = 0;
  register char* ptr = NULL;
  for (ptr = my_strtok(input, DELIMITERS);
       ptr;
       ptr = my_strtok(NULL, DELIMITERS)) {
    temp = my_strlen(ptr);
    sprintf(output + len, "%s ", my_strrev(ptr, temp));
    len += (temp + 1);
  }
  output[len - 1] = STR_TERM;
  printf("%s\n", output);
  printf("%s\n", my_strrev(output, my_strlen(output)));
  return 0;
}

After I was done, I found out about strsep 😛

Combination of all characters of a string

A very common questions and a fairly interesting exercise in programming is to develop and implement an algorithm which prints all the combinations of characters in a string, assuming that all the characters are distinct. eg. for input avi, the output should be a,v,i,av,ai,vi,avi.

Almost all of us are aware of the recursive algorithm for this problem and its implementation is also straight forward.
The recursive code which does the job.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void mycomb_ultrabad(char const * const s, int l, int *arr, int index, int start, int d)
{
	int i,j;
	if (index >= d)
		return ;
	for (j = start ; j < l ; j++)
	{
		arr[index] = j;
		if (index == d-1)
		{
			for (i=0;i<d;i++)putchar(s[arr[i]]);
			putchar('\n');
		}
		else
			mycomb_ultrabad(s,l,arr,index+1,j+1,d);
	}
	return ;
}


int main()
{
	int i,d;
	char s[100];
	scanf("%s",s);
	d = strlen(s);
	int arr[d+1];
	for (i=0;i<d;i++)
	{
		mycomb_ultrabad(s, d, arr,0,0,d-i);
	}
	return 0;
}

Also, all of us are very aware of the highly recursive nature of this code and hence the intriguing time it takes to complete even for a very small input. Knuth has given a very efficient algorithm in his epic TAOCP which does the job at a very high speed.
The (following) code which implements his algorithm and is very fast indeed.

#include<stdio.h>
#include<string.h>
#include<stdlib.h>


void string_combinations(char * s, int d) {
	int n, j, x, *c;

	if (d <= 0) { puts(s); putchar('\n'); return; }
	if (d >= (n = strlen(s))) return;
	c = (int *)malloc((n + 2 - d)*sizeof(int));

	for(j = 0; j < n - d; ++j) c[j] = j;
	c[j + 1] = 0; x = n;
	do {
		c[j--] = x;
		for(x = 0; c[x] < n; ++x) { putchar(s[c[x]]); } putchar('\n');

		if (j >= 0) { x = j + 1; continue; }
		for(x = c[j = 0] + 1; x == c[j + 1]; x = c[++j] + 1) c[j] = j;
	} while (j < n - d);

	free(c);
}

int main()
{
	int i,d;
	char s[100];
	scanf("%s",s);
	d = strlen(s);
	int arr[d+1];
	for (i=0;i<d;i++)
	{
		string_combinations(s,d-i);
	}
	return 0;
}

When I was in first year at my college, and was introduced to programming, I was given this problem. I had no idea about recursion and the concept of algorithms because it was the first time I was studying computers. So, after some brain-storming I developed a method to solve the problem which did not use recursion (obviously because I didn’t even know what recursion was :P). The code which I had made then worked very fine and was very fast indeed. A few days back I was browsing through the codes I had made and I saw through that code. Well, keeping the algorithm same I implemented the code again, (now being a much better programmer than was then :D) and to my surprise my code beats the Knuth’s implementation by a considerable margin for all inputs. 😀 The following is the code which I recently made. Can you guess the algorithm ??? It was motivated from a very common real life analogy. Try to figure out how it works 🙂

#include<stdio.h>
#include<string.h>
#include<stdlib.h>


void mycomb_optimized(char const * const s, int d)
{
	d++;
	int l = strlen(s);
	int arr[d], j, k;
	for (j=0; j < d; j++) arr[j]=j;
	while ( j >=0  )
	{
		for (j=arr[d-1];j < l ; )
		{
			// print the existing string
			for (k=0;k<d;k++) putchar(s[arr[k]]);putchar('\n');
			arr[d-1] = ++j;
		}
		j = d - 2;k = 2;
		while (j >= 0)
		{
			if (arr[j] + 1 <= l - k )
			{
				arr[j]++;j++;
				break;
			}
			j--;k++;
		}
		while (j >= 0 && j <= d - 1 ) { 
			arr[j]=arr[j-1]+1;
			j++;
		}
	}
	return ;
}


int main()
{
	int i,d;
	char s[100];
	scanf("%s",s);
	d = strlen(s);
	int arr[d+1];
	for (i=0;i<d;i++)
	{
		mycomb_optimized(s, i);
	}
	return 0;
}

I still can’t believe how I could come up with such a nice method to solve the problem..

%d bloggers like this: