Monday 14 October 2013

Quick select: Select Kth smallest element in an array of size n in O(n) time.


#include <iostream>
using namespace std;

inline void swap(int arr[],int i,int j) {
      int temp = arr[i];
      arr[i] = arr[j];
      arr[j] = temp;
}

int randint(int len) {
      srand(time(NULL));
    //srand((unsigned)rdtsc());
    return (rand() % (len) + 0);
}

int randomPartition(int arr[],int start , int end) {
      int pivotPos = randint(end-start) + start;
      swap(arr,start,pivotPos);
      
      int pivot = arr[start], i = start+1, j= start+1;
      pivotPos = start;
      for(;j<=end; j++) {
            if(arr[j] < pivot) {
                  swap(arr,i,j);
                  i++;
            }
      }
      swap(arr,pivotPos,i-1);
      pivotPos = i-1;
      return pivotPos;
}

int Rselect(int arr[],int start, int end, int k) {
    int pivotPos = randomPartition(arr,start,end);
    if(pivotPos == k) 
        return arr[k];
    else if(pivotPos < k)
        return Rselect(arr,pivotPos+1,end,k);
    else
        return Rselect(arr,start,pivotPos-1,k);
}

int main() {
    int arr[] = {40,1,10,70,2,5,30,9,8,6};
    int size = sizeof arr/sizeof arr[0];
    cout<<"Kth greatest = "<<Rselect(arr,0,size-1,3);
    system("pause");
    return 0;
}



Knuth-Morris-Pratt Pattern Matching Algorithm

//============================================================================
// Name        : KMP.cpp
// Author      : Varun Sharma
// Complexity  : O(P) pre-processing O(T) kmpMatching, O(P+T).
// Copyright   :
// Description : Knuth-Morris-Pratt pattern matching Algo.
//============================================================================
/*
PRE-PROCESSING
==============
Construct an array kmp[] that stores the no. of positions by which one should move the pattern in case of mismatch at character i.
Kmp[i] = Length of longest prefix in pattern[0…i] which is also suffix of pattern[0…i].
Pattern : AABAAC
Pattern[0..4] = AABAA.
Longest prefix and suffix in Patern[0…4] = AA hence Kmp[4] = 2  
String A      A      B      A      A      C
KMP    0      1      0      1      2      0

1.    Create an array kmp[] and set kmp[0] = 0 and set len =0.
2.    For i=1 to patternLength.
      len will keep track of prefix, while i will scan entire string from left to right.
3.    Check if char at i matches char at len, pattern[i] == pattern[len]
      a. If match occurs increment len (len++) and set kmp[i] = len and increment I (i++).
      b. If no match, then
            i.      If len = 0, i.e. at beginning of pattern, then set kmp[i] = 0 and increment i
            ii.     Else shift pattern by kmp[len-1], do not increment I  and repeat step 3.
 
KMP MATCHING
============
1.      Preprocessing : Build the preprocessing Array.
2.      Use t for scanning Text and p for scanning pattern, t=p=0.
3.      If text[t] == patter[p] then increment p and t (p++, t++)
        a. If p == pattern length, then pattern occurs in string and print.
4.      Else There is a mismatch.
        a. If p==0 i.e. at first character in pattern, then increment t and start matching from first character in pattern.
        b. Else use pre-processing array to determine matching position (p)
           set p = kmp[p-1].
*/
#include <iostream>
using namespace std;

void preprocessing(int *kmp, string pattern, int pLen) {
      kmp[0] = 0;
      int len,i;
      for(i=1,len=0; i<pLen; ) {
            //Check if char at i matches char at len.
            if(pattern[i] == pattern[len]) {
                  //If match occurs increment len and set kmp[i] = len.
                  len++;
                  kmp[i] = len;
                  i++;
            }
            else {
                  //If characters don't match and len = 0, i.e. at very first character.
                  //Then set kmp[i] = 0  and simply increment i.
                  if(!len) { kmp[i] = 0;      i++;}
                  else
                  //else repeat matching step by shifting pattern by kmp[len-1].
                        len = kmp[len-1];
            }
      }
}

void kmpMatching(string text , string pattern) {
      int tLen, pLen , t, p;
      tLen = text.length();
      pLen = pattern.length();

      //if length of text is greater than length of pattern then return.
      if(tLen < pLen) {
            cout<<"Pattern Doesn't exist";
            return;
      }

      //Build kmp array as part of preprocessing step.
      int *kmp = new int[pLen];
      preprocessing(kmp,pattern,pattern.length());

      //p is use to traverse in pattern.
      //t is use to traverse in text.
      for(t = p =0; t<tLen; ) {
            //If character at p in pattern
            //and character at t in text
            //match then increment p and t.
            if(text[t] == pattern[p]) {
                  t++; p++;
                  //if p = pattern length then pattern occurs.
                  if(p==pLen) {
                        cout<<"n Pattern Occurs at : "<<t-pLen+1<<" position";
                        p = 0;
                  }
            }else {
                  //if p = 0, i.e. very first character in pattern.
                  //Then increment text and start matching from pattern[p].
                  if(!p) t++;
                  //else if p!=-0 then set p = kmp[p-1] and start matchng from pattern[p].
                  else p = kmp[p-1];
            }
      }
}

int main() {
      string text = "I google about google.";
      string pattern = "google";
      kmpMatching(text,pattern);
      return 0;
}



Sunday 6 October 2013

Implementation of Bellman Ford Algorithm.


/*
ALGORITHM (Borrowed from wiki)
==============================
procedure BellmanFord(list vertices, list edges, vertex source)
   // This implementation takes in a graph, represented as lists of vertices and edges,
   // and fills two arrays (distance and predecessor) with shortest-path information

   // Step 1: initialize graph
   for each vertex v in vertices:
       if v is source then distance[v] := 0
       else distance[v] := infinity
       predecessor[v] := null

   // Step 2: relax edges repeatedly
   for i from 1 to size(vertices)-1:
       for each edge (u, v) with weight w in edges:
           if distance[u] + w < distance[v]:
               distance[v] := distance[u] + w
               predecessor[v] := u

   // Step 3: check for negative-weight cycles
   for each edge (u, v) with weight w in edges:
       if distance[u] + w < distance[v]:
           error "Graph contains a negative-weight cycle"


BELLMAN FORD VS DIJKSTRA
========================
1.      Bellman Ford runs in O(VE) time where as Dijkstra can be optimized to run in O(VlogV) time with use of Fibonacci heap.
2.      Bellman ford works on negative weight graphs as well where as Dijkstra doesn’t allow negative weight edges.
3.      Dijkstra algorithm is highly centric and is not very distributive. Dijkstra is good if all data is stored on one machine’s main memory.

COMPLEXITY
==========
O(VE)
*/

public void bellmanFord(int sourceVertex) {
      int i,j,cost;
      Vertex source,target;
      boolean negativeCycle = false;
      int distance[] = new int[totalVertices];

      // Step 1: initialize graph
      //Set distance of each vertex from source to INF and distance source = 0
      for(i=0; i<totalVertices; i++)
            distance[i] = INF;
      distance[sourceVertex] = 0;
      
      // Step 2: relax edges repeatedly
      //Relax each edge by 1,2...|V|-1 
      //Complexity : O(|V|).
      //Longest cycle free path between 2 vertices can have atmost |v|-1 edges.
      for(i=0; i<totalVertices-1; i++) {            
            //Explore each edge. Complexity : O(|E|) 
            for(j=0; j<totalVertices; j++) {
                  source = vertices[j];
                  for(Edge e : source.edges) {                  
                        target = e.Target;
                        cost = distance[source.vertexId] + e.weight;
                        
                  //If current distance is greater than new reachable distance.
                  //then update distance.
                        if(distance[target.vertexId] > cost) {
                              distance[target.vertexId] =  cost;
                        }
                  }
            }
      }
      // Step 3: check for negative-weight cycles
      //Detect negative weight cycles by relaxing to |V| edges.
      //If there is a negative cycle in the path then simply report it.
      for(j=0; j<totalVertices; j++) {
            source = vertices[j];
            for(Edge e : source.edges) {
                  target = e.Target;
                  cost = distance[source.vertexId] + e.weight;
                  if(distance[target.vertexId] > cost) {
                        negativeCycle = true;
                        System.out.println("NEGATIVE WEIGHT CYCLE IN GRAPH");
                        break;
                  }
            }
      }
}



Implementation of Dijkstra, single source shortest path.

/*
ALGORITHM
=========
1.      Initialize distance of each vertex from source as INF, and distance back to source = 0.
2.      For I = 0 to Total vertices Repeat step 3 to 4.
3.      Find minimum distance vertex, which is not yet explored and put it to explored set.
4.      Find all vertices reachable from current vertex and update its distance if it is less than distance[currentVertex] + edge weight.

COMPLEXITY
==========
Time  : O(|V||E|) .

Time complexity can be improved by using Fibonacci heap to retrieve minimum value.
Time : O(|E| + |V|log|V|).

LIMITATIONS
===========
•      Dijkstra algorithm can be used for graphs with all positive weight edges only. 
•      Dijkstra is not lacks distribute property.
*/
 
public int minimum(int distance[],boolean visited[]) {
      int min = INF,minNode = -1;
      int i;
      for(i=0; i<totalVertices; i++) {
            if(distance[i] <= min && visited[i] == false) {
                  min = distance[i];
                  minNode = i;
            }
      }
      return minNode;
}

public void dijkstra(int sourceVertex) {
      System.out.println("nnDIJKSTRA");
      int i,j,minV,cost;
      Vertex minVertex,target;
      
      //Distance array to store distance of each vertex from sourcevertex
      int distance[] = new int[totalVertices];
      boolean visited[] = new boolean[totalVertices];
      
      //Initialize distance array with INF, and distance[source] = 0.
      for(i=0; i<totalVertices; i++) {
            visited[i] = false;
            distance[i] = INF;
      }
      distance[sourceVertex] = 0;
      
      for(i=0; i<totalVertices; i++) {
            //Get minimum vertex out of all unexplored vertex 
//and put it to explored set.
            minV = minimum(distance,visited);
            visited[minV] = true;
            if(distance[minV] == INF)
                  continue;
            minVertex = vertices[minV];

            //compute the distance, if it is less than current distance then update it.
            for(j=0; j<minVertex.edges.size(); j++) {
                  cost = distance[minV] + minVertex.edges.get(j).weight;
                  target = minVertex.edges.get(j).Target;
                  if(cost < distance[target.vertexId]) {
                        distance[target.vertexId] =  cost;
                  }
            }
      }
            
}
      



Topological Sort.


/*
     PRE-CONDITION
     =============
     Graph should be Acyclic and Directed (DAG).

     APPLICATIONS
     ============
•     Ordering of formula cell evaluation when recomputing formula values in spreadsheets,
•     Logic synthesis.
•      Determining the order of compilation tasks to perform in makefiles.
•     Data serialization
•     Resolving symbol dependencies in linkers. 
•     It is also used to decide in which order to load tables with foreign keys in databases.

     ALGORITHM
     =========
1.     Put all the vertices in unexplored set.
2.     While unexplored set is not empty, Repeat step 3 to 5
3.     Select a vertex from unexplored set and perform DFS taking this as source,
4.     Remove vertex in DFS path from unexplored set and put them in explored set.
5.     Print the last node in DFS exploration (sink node).
*/

public void printSinkNodeDFS(Vertex source, boolean visited[]) {
     //Move the vertex to explored set.
     visited[source.vertexId] = true;
     for(int i=0; i<source.edges.size(); i++) {
          
          //Get all the vertices reachable from current vertex and present in unexplored set.
          Vertex target = source.edges.get(i).Target;
          if(!visited[target.vertexId])
               printSinkNodeDFS(target, visited);
     }
     //When there are no more unexplored vertices reachable from current node.
     //Process the current node which is now the sink node.
     System.out.print("  " + source.vertexId);
}
     

public void topologicalSort() {
     System.out.println("nnTopological Sort");
     int i;
     
     //Initially each vertex is in unexplored set.
     boolean visited[] = new boolean[totalVertices];
     for(i=0; i<totalVertices; i++) 
          visited[i] = false;
     
     //Scan all vertices in unexplored set and perform DFS with first unscanned vertex as source.
     //Move vertices from unexplored to explored set.
     //Repeat until no vertex is present in unexplored set.
     for(i=0; i<totalVertices; i++) {
          if(!visited[i]) {
               printSinkNodeDFS(vertices[i],visited);
          }
     }
}



Friday 4 October 2013

Depth First Search


     /*
          TIME COMPLEXITY :
          O(|v| + |E|).
          
          APPLICATIONS OF DFS
          Finding strongly connected components in a graph (Kosaraju Algorithm).
          Topological Sorting.
          Solving Maze.
     */
     
     public void recursiveDFS(Vertex sourceVertex,boolean visited[]) {
          //Process the top vertex from stack and put it to explored set of vertices.
          visited[sourceVertex.vertexId] = true;
          System.out.println(sourceVertex.vertexId);
          
          for(int i=0; i<sourceVertex.edges.size(); i++) {
               //Push all not visited vertices reachable from process vertex 
               //one by one and aggressively search to deeper levels.
               //backtrack when no more path can be taken and 
               //process all other vertices reachable from processed vertex
               Vertex target = sourceVertex.edges.get(i).Target;
               if(!visited[target.vertexId]) {
                    recursiveDFS(target,visited);
               }
          }
     }

     public void DFS(int sourceVertex) {
          System.out.println("DFS Traversal");
          int i;
          
          //Initially put all vertices in unexplored set.
          boolean visited[] = new boolean[totalVertices];
          for(i=0; i<totalVertices; i++)
               visited[i] = false;
          
          //Push source vertex in stack.
          recursiveDFS(vertices[sourceVertex],visited);
     }


Breadth First Search in a Graph.



     /* 
        ALGORITHM
        Enqueue the root node
        Dequeue a node and examine it
           If the element sought is found in this node, quit the search and return a result.
           Otherwise enqueue any successors (the direct child nodes) that have not yet been discovered.
        If the queue is empty, every node on the graph has been examined – quit the search and return "not found".
        If the queue is not empty, repeat from Step 2.
 
 COMPLEXITY
        BFS is very first and runs in linear time O(|V| + |E|) since each vertex and edge is explored once in worst case.
        
        APPLICATIONS
        Finding shortest path between 2 nodes in an undirected graph.
        Checking if a graph is bipartite
        Ford Fulkerson method for computing maximum flow in a network.
        Serialization/Deserialization of a binary tree.
        Finding all nodes within one connected components.
       
     */
     
     public void BFS(int sourceVertex) {
          System.out.println("BFS Traversal");
          int i,targetId;
          ArrayList<Vertex> queue = new ArrayList<>();
          boolean visited[] = new boolean[totalVertices];
          
          //Put all the vertices to unexplored set of vertices, initially.
          for(i=0; i<totalVertices; i++) 
               visited[i] = false;
          
          //Put source vertex to explored vertices set.
          visited[sourceVertex] = true;
          queue.add(vertices[sourceVertex]);
          while(!queue.isEmpty()) {
               
               //Remove the very first vertex from the queue.
               Vertex vertex = queue.get(0);
               queue.remove(0);
               
               //Process the removed vertex.
               System.out.println(vertex.vertexId);
               
               for(i=0; i<vertex.edges.size(); i++) {
                    
                    //Get all the vertices reachable from processed vertex and add them to queue.
                    //put the processed vertex to explored set of vertices, so that its not explored again.
                    targetId = vertex.edges.get(i).Target.vertexId;
                    if(!visited[targetId]) {
                         queue.add(vertex.edges.get(i).Target);
                         visited[targetId] = true;
                    }
               }
          }
     }



Adjacency List Implementation

/*
 * Demo Vertex class contains only vertexId.
 * Can be extended to keep other details.
 */
public class Vertex {
     int vertexId;
     ArrayList<Edge> edges ;
     
     public Vertex(int id) {
          vertexId = id;
          edges = new ArrayList<>();
     }
     
     public void addEdge(Edge e) {
          edges.add(e);
     }
}




public class Edge {
     Vertex Target;
     int weight;
     
     public Edge(Vertex Target, int weight) {
          this.Target = Target;
          this.weight = weight;
     }
}



public class Graph {
     /*
      * @vertices, ArrayList of all vertices in a graph.
      * @directed, boolean variable to store if graph is directed/undirected
      * @totalVertices, Total number of vertices in a graph.
      */
     Vertex vertices[];
     boolean directed;
     int totalVertices;
     
     /*
      * @param directed , boolean variable to keep if graph is directed.
      * @param totalVertices, total number of vertices in a graph.
      */
     public Graph(int totalVertices, boolean directed) {
          this.totalVertices = totalVertices;
          this.directed = directed;
          vertices = new Vertex[totalVertices];
          for(int i=0; i<totalVertices; i++)
               vertices[i] = null;
     }
     
     /*
      * @param target @param source, check if source and target vertex already present (!=null).
      * if source/target vertex not present in graph then create it.
      * Else add weighted edge from source to target vertex.
      */
     public void addVertex(int source, int target, int weight) {
          if(vertices[source] == null) {
               vertices[source] = new Vertex(source);
          }
          if(vertices[target] == null) {
               vertices[target] = new Vertex(target);
          }
          
          Vertex sourceVertex = vertices[source];
          Vertex targetVertex = vertices[target];
          sourceVertex.addEdge(new Edge(targetVertex,weight));
          if(!directed) {
               targetVertex.addEdge(new Edge(sourceVertex,weight));
          }
        }
}

Graph Introduction.

GRAPH
=====
A graph is a datastructure consisting a set of vertices |V| and a set of edges |E|.
Vertices usually represent an entity and it might hold additional data pertaining to this entity.
The relationship between 2 different entities is represented by an edge. So if there is some relation between vertex a and vertex b, then there is an edge between a and b.

Example of graphs:
World wide web.
Social networks.
Internet Routers etc.


TYPES OF GRAPH
==============

1. UNDIRECTED GRAPHS
A graph is called as undirected graph if the edges are not directed meaning, there is a 2 way path for an edge i.e. the edge that goes out of the vertex also lead to this vertex from the connected vertex.
For eg. Social Network graph, if a is freind of b then it is tacit that b is also friend of a. Hence for representing this relation, we use an undirected graph.
Edge between a and b, leads to b from a and also to a from b.

2. DIRECTED GRAPH
A graph is called directed graph/digraph if the edges have directions meaning, the edge that goes out of a vertex doesnot lead to this vertex from connected vertex.
For eg. consider road network, It is possible that there is a one way road between city a and city b, hence people can reach b from a but not a from b since the graphs are directed.

3. WEIGHTED GRAPH
A graph is called as weighted graph if some weight is assigned to the edges.
For eg, again condier road network, There are 2 ways to reach city b from city a, but one of the path is shorter than the other than to represent this, we assign weight to the edges connecting city b from city a, the path taking less time is assign less weight and path taking more time might be assing more weight.

GRAPH REPRESENTATION
====================
There are many ways to represent graphs:
1. Adjacency Matrix.
2. Adjacency List.
3. Incidence Matrix.
4. Incidence List.