Monday 14 October 2013

Knuth-Morris-Pratt Pattern Matching Algorithm

//============================================================================
// Name        : KMP.cpp
// Author      : Varun Sharma
// Complexity  : O(P) pre-processing O(T) kmpMatching, O(P+T).
// Copyright   :
// Description : Knuth-Morris-Pratt pattern matching Algo.
//============================================================================
/*
PRE-PROCESSING
==============
Construct an array kmp[] that stores the no. of positions by which one should move the pattern in case of mismatch at character i.
Kmp[i] = Length of longest prefix in pattern[0…i] which is also suffix of pattern[0…i].
Pattern : AABAAC
Pattern[0..4] = AABAA.
Longest prefix and suffix in Patern[0…4] = AA hence Kmp[4] = 2  
String A      A      B      A      A      C
KMP    0      1      0      1      2      0

1.    Create an array kmp[] and set kmp[0] = 0 and set len =0.
2.    For i=1 to patternLength.
      len will keep track of prefix, while i will scan entire string from left to right.
3.    Check if char at i matches char at len, pattern[i] == pattern[len]
      a. If match occurs increment len (len++) and set kmp[i] = len and increment I (i++).
      b. If no match, then
            i.      If len = 0, i.e. at beginning of pattern, then set kmp[i] = 0 and increment i
            ii.     Else shift pattern by kmp[len-1], do not increment I  and repeat step 3.
 
KMP MATCHING
============
1.      Preprocessing : Build the preprocessing Array.
2.      Use t for scanning Text and p for scanning pattern, t=p=0.
3.      If text[t] == patter[p] then increment p and t (p++, t++)
        a. If p == pattern length, then pattern occurs in string and print.
4.      Else There is a mismatch.
        a. If p==0 i.e. at first character in pattern, then increment t and start matching from first character in pattern.
        b. Else use pre-processing array to determine matching position (p)
           set p = kmp[p-1].
*/
#include <iostream>
using namespace std;

void preprocessing(int *kmp, string pattern, int pLen) {
      kmp[0] = 0;
      int len,i;
      for(i=1,len=0; i<pLen; ) {
            //Check if char at i matches char at len.
            if(pattern[i] == pattern[len]) {
                  //If match occurs increment len and set kmp[i] = len.
                  len++;
                  kmp[i] = len;
                  i++;
            }
            else {
                  //If characters don't match and len = 0, i.e. at very first character.
                  //Then set kmp[i] = 0  and simply increment i.
                  if(!len) { kmp[i] = 0;      i++;}
                  else
                  //else repeat matching step by shifting pattern by kmp[len-1].
                        len = kmp[len-1];
            }
      }
}

void kmpMatching(string text , string pattern) {
      int tLen, pLen , t, p;
      tLen = text.length();
      pLen = pattern.length();

      //if length of text is greater than length of pattern then return.
      if(tLen < pLen) {
            cout<<"Pattern Doesn't exist";
            return;
      }

      //Build kmp array as part of preprocessing step.
      int *kmp = new int[pLen];
      preprocessing(kmp,pattern,pattern.length());

      //p is use to traverse in pattern.
      //t is use to traverse in text.
      for(t = p =0; t<tLen; ) {
            //If character at p in pattern
            //and character at t in text
            //match then increment p and t.
            if(text[t] == pattern[p]) {
                  t++; p++;
                  //if p = pattern length then pattern occurs.
                  if(p==pLen) {
                        cout<<"n Pattern Occurs at : "<<t-pLen+1<<" position";
                        p = 0;
                  }
            }else {
                  //if p = 0, i.e. very first character in pattern.
                  //Then increment text and start matching from pattern[p].
                  if(!p) t++;
                  //else if p!=-0 then set p = kmp[p-1] and start matchng from pattern[p].
                  else p = kmp[p-1];
            }
      }
}

int main() {
      string text = "I google about google.";
      string pattern = "google";
      kmpMatching(text,pattern);
      return 0;
}



1 comment:

  1. Reference: http://www.youtube.com/watch?v=EEjNb9yUv1k

    ReplyDelete