//============================================================================
// Name : KMP.cpp
// Author : Varun Sharma
// Complexity : O(P) pre-processing O(T) kmpMatching, O(P+T).
// Copyright :
// Description : Knuth-Morris-Pratt pattern matching Algo.
//============================================================================
/*
PRE-PROCESSING
==============
Construct an array kmp[] that stores the no. of positions by which one should move the pattern in case of mismatch at character i.
Kmp[i] = Length of longest prefix in pattern[0…i] which is also suffix of pattern[0…i].
Pattern : AABAAC
Pattern[0..4] = AABAA.
Longest prefix and suffix in Patern[0…4] = AA hence Kmp[4] = 2
String A A B A A C
KMP 0 1 0 1 2 0
1. Create an array kmp[] and set kmp[0] = 0 and set len =0.
2. For i=1 to patternLength.
len will keep track of prefix, while i will scan entire string from left to right.
3. Check if char at i matches char at len, pattern[i] == pattern[len]
a. If match occurs increment len (len++) and set kmp[i] = len and increment I (i++).
b. If no match, then
i. If len = 0, i.e. at beginning of pattern, then set kmp[i] = 0 and increment i
ii. Else shift pattern by kmp[len-1], do not increment I and repeat step 3.
KMP MATCHING
============
1. Preprocessing : Build the preprocessing Array.
2. Use t for scanning Text and p for scanning pattern, t=p=0.
3. If text[t] == patter[p] then increment p and t (p++, t++)
a. If p == pattern length, then pattern occurs in string and print.
4. Else There is a mismatch.
a. If p==0 i.e. at first character in pattern, then increment t and start matching from first character in pattern.
b. Else use pre-processing array to determine matching position (p)
set p = kmp[p-1].
*/
#include <iostream>
using namespace std;
void preprocessing(int *kmp, string pattern, int pLen) {
kmp[0] = 0;
int len,i;
for(i=1,len=0; i<pLen; ) {
//Check if char at i matches char at len.
if(pattern[i] == pattern[len]) {
//If match occurs increment len and set kmp[i] = len.
len++;
kmp[i] = len;
i++;
}
else {
//If characters don't match and len = 0, i.e. at very first character.
//Then set kmp[i] = 0 and simply increment i.
if(!len) { kmp[i] = 0; i++;}
else
//else repeat matching step by shifting pattern by kmp[len-1].
len = kmp[len-1];
}
}
}
void kmpMatching(string text , string pattern) {
int tLen, pLen , t, p;
tLen = text.length();
pLen = pattern.length();
//if length of text is greater than length of pattern then return.
if(tLen < pLen) {
cout<<"Pattern Doesn't exist";
return;
}
//Build kmp array as part of preprocessing step.
int *kmp = new int[pLen];
preprocessing(kmp,pattern,pattern.length());
//p is use to traverse in pattern.
//t is use to traverse in text.
for(t = p =0; t<tLen; ) {
//If character at p in pattern
//and character at t in text
//match then increment p and t.
if(text[t] == pattern[p]) {
t++; p++;
//if p = pattern length then pattern occurs.
if(p==pLen) {
cout<<"n Pattern Occurs at : "<<t-pLen+1<<" position";
p = 0;
}
}else {
//if p = 0, i.e. very first character in pattern.
//Then increment text and start matching from pattern[p].
if(!p) t++;
//else if p!=-0 then set p = kmp[p-1] and start matchng from pattern[p].
else p = kmp[p-1];
}
}
}
int main() {
string text = "I google about google.";
string pattern = "google";
kmpMatching(text,pattern);
return 0;
}
Monday, 14 October 2013
Knuth-Morris-Pratt Pattern Matching Algorithm
Subscribe to:
Post Comments (Atom)
Reference: http://www.youtube.com/watch?v=EEjNb9yUv1k
ReplyDelete