//============================================================================ // Name : KMP.cpp // Author : Varun Sharma // Complexity : O(P) pre-processing O(T) kmpMatching, O(P+T). // Copyright : // Description : Knuth-Morris-Pratt pattern matching Algo. //============================================================================ /* PRE-PROCESSING ============== Construct an array kmp[] that stores the no. of positions by which one should move the pattern in case of mismatch at character i. Kmp[i] = Length of longest prefix in pattern[0…i] which is also suffix of pattern[0…i]. Pattern : AABAAC Pattern[0..4] = AABAA. Longest prefix and suffix in Patern[0…4] = AA hence Kmp[4] = 2 String A A B A A C KMP 0 1 0 1 2 0 1. Create an array kmp[] and set kmp[0] = 0 and set len =0. 2. For i=1 to patternLength. len will keep track of prefix, while i will scan entire string from left to right. 3. Check if char at i matches char at len, pattern[i] == pattern[len] a. If match occurs increment len (len++) and set kmp[i] = len and increment I (i++). b. If no match, then i. If len = 0, i.e. at beginning of pattern, then set kmp[i] = 0 and increment i ii. Else shift pattern by kmp[len-1], do not increment I and repeat step 3. KMP MATCHING ============ 1. Preprocessing : Build the preprocessing Array. 2. Use t for scanning Text and p for scanning pattern, t=p=0. 3. If text[t] == patter[p] then increment p and t (p++, t++) a. If p == pattern length, then pattern occurs in string and print. 4. Else There is a mismatch. a. If p==0 i.e. at first character in pattern, then increment t and start matching from first character in pattern. b. Else use pre-processing array to determine matching position (p) set p = kmp[p-1]. */ #include <iostream> using namespace std; void preprocessing(int *kmp, string pattern, int pLen) { kmp[0] = 0; int len,i; for(i=1,len=0; i<pLen; ) { //Check if char at i matches char at len. if(pattern[i] == pattern[len]) { //If match occurs increment len and set kmp[i] = len. len++; kmp[i] = len; i++; } else { //If characters don't match and len = 0, i.e. at very first character. //Then set kmp[i] = 0 and simply increment i. if(!len) { kmp[i] = 0; i++;} else //else repeat matching step by shifting pattern by kmp[len-1]. len = kmp[len-1]; } } } void kmpMatching(string text , string pattern) { int tLen, pLen , t, p; tLen = text.length(); pLen = pattern.length(); //if length of text is greater than length of pattern then return. if(tLen < pLen) { cout<<"Pattern Doesn't exist"; return; } //Build kmp array as part of preprocessing step. int *kmp = new int[pLen]; preprocessing(kmp,pattern,pattern.length()); //p is use to traverse in pattern. //t is use to traverse in text. for(t = p =0; t<tLen; ) { //If character at p in pattern //and character at t in text //match then increment p and t. if(text[t] == pattern[p]) { t++; p++; //if p = pattern length then pattern occurs. if(p==pLen) { cout<<"n Pattern Occurs at : "<<t-pLen+1<<" position"; p = 0; } }else { //if p = 0, i.e. very first character in pattern. //Then increment text and start matching from pattern[p]. if(!p) t++; //else if p!=-0 then set p = kmp[p-1] and start matchng from pattern[p]. else p = kmp[p-1]; } } } int main() { string text = "I google about google."; string pattern = "google"; kmpMatching(text,pattern); return 0; }
Monday, 14 October 2013
Knuth-Morris-Pratt Pattern Matching Algorithm
Subscribe to:
Post Comments (Atom)
Reference: http://www.youtube.com/watch?v=EEjNb9yUv1k
ReplyDelete