Rabin-Karp algorithm for Pattern Searching in Matrix
Given matrices txt[][] of dimensions m1 x m2 and pattern pat[][] of dimensions n1 x n2, the task is to check whether a pattern exists in the matrix or not, and if yes then print the top most indices of the pat[][] in txt[][]. It is assumed that m1, m2 ≥ n1, n2
Examples:
Input: txt[][] = {{G, H, I, P} {J, K, L, Q} {R, G, H, I} {S, J, K, L} } pat[][] = {{G, H, I}, {J, K, L} } Output: Pattern found at ( 0, 0 ) Pattern found at ( 2, 1 ) Explanation: Input: txt[][] = { {A, B, C}, {D, E, F}, {G, H, I} } pat[][] = { {E, F}, {H, I} } Output: Pattern found at (1, 1)
Approach: In order to find a pattern in a 2-D array using Rabin-Karp algorithm, consider an input matrix txt[m1][m2] and a pattern pat[n1][n2]. The idea is to find the hash of each columns of mat[][] and pat[][] and compare the hash values. For any column if hash values are equals than check for the corresponding rows values. Below are the steps:
- Find the hash values of each column for the first N1 rows in both txt[][] and pat[][] matrix.
- Apply Rabin-Karp Algorithm by finding hash values for the column hashes found in step 1.
- If a match is found compare txt[][] and pat[][] matrices for the specific rows and columns.
- Else slide down the column hashes by 1 row in the txt matrix using a rolling hash.
- Repeat steps 2 to 4 for all the hash values and if we found any pat[][] match in txt[][] then print the indices of top most cell in the txt[][].
To find the hash value: In order to find the hash value of a substring of size N in a text using rolling hash follow below steps:
- Remove the first character from the string: hash(txt[s:s+n-1])-(radix**(n-1)*txt[s])%prime_number
- Add the next character to the string: hash(txt[s:s+n-1])*radix + txt[n]
Below is the implementation of the above approach:
C++
#include <bits/stdc++.h> using namespace std; long long mod = 257; //The modular value long long r = 256; //radix long long dr = 1; //Highest power for row hashing long long dc = 1; //Highest power for col hashing //func that return a power n under mod m in LogN long long power( int a, int n, long long m){ if (n == 0){ return 1; } if (n == 1){ return a%m; } long long pow = power(a,n/2,m); if (n&1){ return ((a%m)*( pow )%m * ( pow )%m)%m; } else { return (( pow )%m * ( pow )%m)%m; } } //Checks if all values of pattern matches with the text bool check(vector<vector< char >> &txt,vector<vector< char >> &pat, long long r, long long c){ for ( long long i=0;i<pat.size();i++){ for ( long long j=0;j<pat[0].size();j++){ if (pat[i][j] != txt[i+r][j+c]) return false ; } } return true ; } //Finds the first hash of first n rows where n is no. of rows in pattern vector< long long > findHash(vector<vector< char >> &mat, long long row){ vector< long long > hash; long long col = mat[0].size(); for ( long long i=0;i<col;i++){ long long h = 0; for ( long long j=0;j<row;j++){ h = ((h*r)%mod + mat[j][i]%mod)%mod; } hash.push_back(h); } return hash; } //rolling hash function for columns void colRollingHash(vector<vector< char >> &txt, vector< long long > &t_hash, long long row, long long p_row){ for ( long long i=0;i<t_hash.size();i++){ t_hash[i] = (t_hash[i]%mod - ((txt[row][i])%mod*(dr)%mod)%mod)%mod; t_hash[i] = ((t_hash[i]%mod) * (r%mod))%mod; t_hash[i] = (t_hash[i]%mod + txt[row+p_row][i]%mod)%mod; } } void rabinKarp(vector<vector< char >> &txt, vector<vector< char >> &pat){ long long t_row = txt.size(); long long t_col = txt[0].size(); long long p_row = pat.size(); long long p_col = pat[0].size(); dr = power(r,p_row-1,mod); dc = power(r,p_col-1,mod); vector< long long > t_hash = findHash(txt,p_row); //column hash of p_row rows vector< long long > p_hash = findHash(pat,p_row); //column hash of p_row rows long long p_val = 0; //hash of entire pattern matrix for ( long long i=0;i<p_col;i++){ p_val = (p_val*r + p_hash[i])%mod; } for ( long long i=0;i<=(t_row-p_row);i++){ long long t_val = 0; for ( long long i=0;i<p_col;i++){ t_val = ((t_val*r) + t_hash[i])%mod; } for ( long long j=0;j<=(t_col-p_col);j++){ if (p_val == t_val){ if (check(txt,pat,i,j)){ cout<<i<< " " <<j<<endl; } } //calculating t_val for next set of columns t_val = (t_val%mod - ((t_hash[j]%mod)*(dc%mod))%mod + mod)%mod; t_val = (t_val%mod * r%mod)%mod; t_val = (t_val%mod + t_hash[j+p_col]%mod)%mod; } if (i < t_row-p_row){ //call this function for hashing form next row colRollingHash(txt,t_hash,i,p_row); } } } int main(){ vector<vector< char >> txt{{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' }}; vector<vector< char >> pat{{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' },{ 'A' , 'B' , 'C' , 'D' , 'E' }}; //function prints the indices of row and col where its a match in txt rabinKarp(txt,pat); return 0; } |
Python3
# Python implementation for the # pattern matching in 2-D matrix # Function to find the hash-value # of the given columns of text def findHash(arr, col, row): hashCol = [] add = 0 radix = 256 # For each column for i in range ( 0 , col): for j in reversed ( range ( 0 , row)): add = add + (radix * * (row - j - 1 ) * ord (arr[j][i])) % 101 hashCol.append(add % 101 ); add = 0 return hashCol # Function to check equality of the # two strings def checkEquality(txt, row, col, flag): txt = [txt[i][col:patCol + col] for i in range (row, patRow + row)] # If pattern found if txt = = pat: flag = 1 print ( "Pattern found at" , \ "(" , row, ", " , col, ")" ) return flag # Function to find the hash value of # of the next column using rolling-hash # of the Rabin-karp def colRollingHash(txtHash, nxtRow): radix = 256 # Find the hash of the matrix for j in range ( len (txtHash)): txtHash[j] = (txtHash[j] * radix \ + ord (txt[nxtRow][j])) % 101 txtHash[j] = txtHash[j] - (radix * * (patRow) * ord (txt[nxtRow - patRow][j])) % 101 txtHash[j] = txtHash[j] % 101 return txtHash # Function to match a pattern in # the given 2D Matrix def search(txt, pat): # List of the hashed value for # the text and pattern columns patHash = [] txtHash = [] # Hash value of the # pat_hash and txt_hash patVal = 0 txtVal = 0 # Radix value for the input characters radix = 256 # Variable to determine if # pattern was found or not flag = 0 # Function call to find the # hash value of columns txtHash = findHash(txt, txtCol, patRow) patHash = findHash(pat, patCol, patRow) # Calculate hash value for patHash for i in range ( len (patHash)): patVal = patVal \ + (radix * * ( len (patHash) - i - 1 ) * patHash[i] % 101 ) patVal = patVal % 101 # Applying Rabin-Karp to compare # txtHash and patHash for i in range (patRow - 1 , txtRow): col = 0 txtVal = 0 # Find the hash value txtHash for j in range ( len (patHash)): txtVal = txtVal\ + (radix * * ( len (patHash) - j - 1 ) * txtHash[j]) % 101 txtVal = txtVal % 101 if txtVal = = patVal: flag = checkEquality(\ txt, i + 1 - patRow, col, flag) else : # Roll the txt window by one character for k in range ( len (patHash), len (txtHash)): txtVal = txtVal \ * radix + (txtHash[k]) % 101 txtVal = txtVal \ - (radix * * ( len (patHash)) * (txtHash[k - len (patHash)])) % 101 txtVal = txtVal % 101 col = col + 1 # Check if txtVal and patVal are equal if patVal = = txtVal: flag = checkEquality(\ txt, i + 1 - patRow, col, flag) else : continue # To make sure i does not exceed txtRow if i + 1 <txtRow: txtHash = colRollingHash(txtHash, i + 1 ) if flag = = 0 : print ( "Pattern not found" ) # Driver Code if __name__ = = "__main__" : # Given Text txt = [[ 'A' , 'B' , 'C' ], \ [ 'D' , 'E' , 'F' ], \ [ 'G' , 'H' , 'I' ]] # Given Pattern pat = [[ 'E' , 'F' ], [ 'H' , 'I' ]] # Dimensions of the text txtRow = 3 txtCol = 3 # Dimensions for the pattern patRow = 2 patCol = 2 # Function Call search(txt, pat) |
Pattern found at ( 1 , 1 )