Sort an array of strings based on the frequency of good words in them
Given a set of product reviews (R) by different customers and a string S containing good words separated by a _, the task is to sort the reviews in decreasing order of their goodness value. Goodness Value is defined by the number of good words present in that review.
Examples:
Input: S = “geeks_for_geeks_is_great”, R = {“geeks_are_geeks”, “geeks_dont_lose”, “geeks_for_geeks_is_love”}
Output: geeks for geeks is love geeks are geeks geeks dont loseInput: S = “cool_wifi_ice”, R = {“water_is_cool”, “cold_ice_drink”, “cool_wifi_speed”}
Output: cool wifi speed water is cool cold ice drink
Naive approach: Insert all the good words in an unordered_set and then iterate through each word of each sentence of the reviews array and keep a count of the good words by checking if this word is present in that set of good words. We then use a stable sorting algorithm and sort the array R according to the count of good words in each review present in R. It’s clear that the time complexity of this method is greater than O(N * NlogN) . Efficient approach: Make a Trie of all the good words and check the goodness of each word in a review using the trie.
- Insert all the good words in a trie.
- For each review, calculate the number of good words in it by checking whether a given word exists in the trie or not.
Below is the implementation of the above approach:
CPP14
// C++ implementation of the approach #include <bits/stdc++.h> using namespace std; #define F first #define S second #define MAX 26 // Comparator function for sorting bool cmp( const pair< int , int >& a, const pair< int , int >& b) { // Compare the number of good words if (a.F == b.F) return a.S < b.S; return a.F > b.F; } // Structure of s Trie node struct node { bool exist; node* arr[MAX]; node( bool bul = false ) { exist = bul; for ( int i = 0; i < MAX; i++) arr[i] = NULL; } }; // Function to add a string to the trie void add(string s, node* trie) { // Add a node to the trie int n = s.size(); for ( int i = 0; i < n; i++) { // If trie doesn't already contain // the current node then create one if (trie->arr[s[i] - 'a' ] == NULL) trie->arr[s[i] - 'a' ] = new node(); trie = trie->arr[s[i] - 'a' ]; } trie->exist = true ; return ; } // Function that returns true if // the trie contains the string s bool search(string s, node* trie) { // Search for a node in the trie for ( int i = 0; i < s.size(); i++) { if (trie->arr[s[i] - 'a' ] == NULL) return false ; trie = trie->arr[s[i] - 'a' ]; } return trie->exist; } // Function to replace every '_' with a // white space in the given string void convert(string& str) { // Convert '_' to spaces for ( int i = 0; i < str.size(); i++) if (str[i] == '_' ) str[i] = ' ' ; return ; } // Function to sort the array based on good words void sortArr(string good, vector<string>& review) { // Extract all the good words which // are '_' separated convert(good); node* trie = new node(); string word; stringstream ss; ss << good; // Building the entire trie by stringstreaming // the 'good words' string while (ss >> word) add(word, trie); int k, n = review.size(); // To store the number of good words // and the string index pairs vector<pair< int , int > > rating(n); for ( int i = 0; i < n; i++) { convert(review[i]); ss.clear(); ss << review[i]; k = 0; while (ss >> word) { // If this word is present in the trie // then increment its count if (search(word, trie)) k++; } // Store the number of good words in the // current string paired with its // index in the original array rating[i].F = k; rating[i].S = i; } // Using comparator function to // sort the array as required sort(rating.begin(), rating.end(), cmp); // Print the sorted array for ( int i = 0; i < n; i++) cout << review[rating[i].S] << "\n" ; } // Driver code int main() { // String containing good words string S = "geeks_for_geeks_is_great" ; // Vector of strings to be sorted vector<string> R = { "geeks_are_geeks" , "geeks_dont_lose" , "geeks_for_geeks_is_love" }; // Sort the array based on the given conditions sortArr(S, R); } |
Java
import java.util.*; class Program { // Comparator function for sorting static class PairComparer implements Comparator<Map.Entry<Integer, Integer>> { public int compare(Map.Entry<Integer, Integer> a, Map.Entry<Integer, Integer> b) { // Compare the number of good words if (a.getKey().equals(b.getKey())) { return a.getValue() - b.getValue(); } return b.getKey() - a.getKey(); } } // Structure of s Trie node static class TrieNode { public boolean exist; public TrieNode[] arr; public TrieNode() { exist = false ; arr = new TrieNode[ 26 ]; } public TrieNode( boolean bul) { exist = bul; arr = new TrieNode[ 26 ]; } } // Function to add a string to the trie static void add(String s, TrieNode trie) { // Add a node to the trie int n = s.length(); for ( int i = 0 ; i < n; i++) { int index = s.charAt(i) - 'a' ; // If trie doesn't already contain // the current node then create one if (trie.arr[index] == null ) { trie.arr[index] = new TrieNode(); } trie = trie.arr[index]; } trie.exist = true ; } // Function that returns true if // the trie contains the string s static boolean search(String s, TrieNode trie) { // Search for a node in the trie for ( int i = 0 ; i < s.length(); i++) { int index = s.charAt(i) - 'a' ; if (trie.arr[index] == null ) { return false ; } trie = trie.arr[index]; } return trie.exist; } // Function to replace every '_' with a // white space in the given string static void convert(StringBuilder str) { // Convert '_' to spaces for ( int i = 0 ; i < str.length(); i++) { if (str.charAt(i) == '_' ) { str.setCharAt(i, ' ' ); } } } // Function to sort the array based on good words static void sortArr(String good, List<String> review) { StringBuilder goodBuilder = new StringBuilder(good); convert(goodBuilder); TrieNode trie = new TrieNode(); String[] words = goodBuilder.toString().split( " " ); // Building the entire trie by iterating // over the good words string array for (String word : words) { add(word, trie); } int n = review.size(); // To store the number of good words // and the string index pairs List<Map.Entry<Integer, Integer>> rating = new ArrayList<>(); for ( int i = 0 ; i < n; i++) { StringBuilder reviewBuilder = new StringBuilder(review.get(i)); convert(reviewBuilder); words = reviewBuilder.toString().split( " " ); int k = 0 ; for (String word : words) { // If this word is present in the trie // then increment its count if (search(word, trie)) { k++; } } // Store the number of good words in the // current string paired with its // index in the original list rating.add(Map.entry(k, i)); } // Using PairComparer to sort the list as required rating.sort( new PairComparer()); // Print the sorted array // Print the sorted array for (Map.Entry<Integer, Integer> entry : rating) { System.out.println(review.get(entry.getValue())); } } public static void main(String[] args) { String good = "geeks_for_geeks_is_great" ; List<String> review = Arrays.asList( "geeks_are_geeks" , "geeks_dont_lose" , "geeks_for_geeks_is_love" ); sortArr(good, review); } } // This code is contribute by shiv1o43g |
Python3
# Python implementation of the approach MAX = 26 # Comparator function for sorting def cmp (a, b): # Compare the number of good words if a[ 0 ] = = b[ 0 ]: return a[ 1 ] < b[ 1 ] return a[ 0 ] > b[ 0 ] # Structure of a Trie node class Node: def __init__( self , bul = False ): self .exist = bul self .arr = [ None ] * MAX # Function to add a string to the trie def add(s, trie): # Add a node to the trie n = len (s) for i in range (n): # If trie doesn't already contain # the current node then create one if trie.arr[ ord (s[i]) - ord ( 'a' )] is None : trie.arr[ ord (s[i]) - ord ( 'a' )] = Node() trie = trie.arr[ ord (s[i]) - ord ( 'a' )] trie.exist = True return # Function that returns true if # the trie contains the string s def search(s, trie): # Search for a node in the trie for i in range ( len (s)): if trie.arr[ ord (s[i]) - ord ( 'a' )] is None : return False trie = trie.arr[ ord (s[i]) - ord ( 'a' )] return trie.exist # Function to replace every '_' with a # white space in the given string def convert(s): # Convert '_' to spaces return s.replace( '_' , ' ' ) # Function to sort the array based on good words def sortArr(good, review): # Extract all the good words which # are '_' separated good = convert(good) trie = Node() words = good.split() # Building the entire trie from the good words for word in words: add(word, trie) n = len (review) # To store the number of good words # and the string index pairs rating = [( 0 , i) for i in range (n)] for i in range (n): review[i] = convert(review[i]) words = review[i].split() k = 0 for word in words: # If this word is present in the trie # then increment its count if search(word, trie): k + = 1 # Store the number of good words in the # current string paired with its # index in the original array rating[i] = (k, i) # Using comparator function to # sort the array as required rating.sort(key = lambda x: ( - x[ 0 ], x[ 1 ])) # Print the sorted array for r in rating: print (review[r[ 1 ]]) # Driver code if __name__ = = '__main__' : # String containing good words S = 'geeks_for_geeks_is_great' # List of strings to be sorted R = [ 'geeks_are_geeks' , 'geeks_dont_lose' , 'geeks_for_geeks_is_love' ] # Sort the array based on the given conditions sortArr(S, R) |
C#
using System; using System.Collections.Generic; using System.Linq; class Program { // Comparator function for sorting class PairComparer : IComparer<KeyValuePair< int , int >> { public int Compare(KeyValuePair< int , int > a, KeyValuePair< int , int > b) { // Compare the number of good words if (a.Key == b.Key) return a.Value - b.Value; return b.Key - a.Key; } } // Structure of s Trie node class TrieNode { public bool Exist; public TrieNode[] Arr; public TrieNode( bool bul = false ) { Exist = bul; Arr = new TrieNode[26]; } } // Function to add a string to the trie static void Add( string s, TrieNode trie) { // Add a node to the trie int n = s.Length; for ( int i = 0; i < n; i++) { // If trie doesn't already contain // the current node then create one if (trie.Arr[s[i] - 'a' ] == null ) trie.Arr[s[i] - 'a' ] = new TrieNode(); trie = trie.Arr[s[i] - 'a' ]; } trie.Exist = true ; return ; } // Function that returns true if // the trie contains the string s static bool Search( string s, TrieNode trie) { // Search for a node in the trie for ( int i = 0; i < s.Length; i++) { if (trie.Arr[s[i] - 'a' ] == null ) return false ; trie = trie.Arr[s[i] - 'a' ]; } return trie.Exist; } // Function to replace every '_' with a // white space in the given string static void Convert( ref string str) { // Convert '_' to spaces for ( int i = 0; i < str.Length; i++) if (str[i] == '_' ) str = str.Remove(i, 1).Insert(i, " " ); } // Function to sort the array based on good words static void SortArr( string good, List< string > review) { // Extract all the good words which // are '_' separated Convert( ref good); TrieNode trie = new TrieNode(); string [] words = good.Split( ' ' ); // Building the entire trie by iterating // over the good words string array foreach ( string word in words) Add(word, trie); int k, n = review.Count; // To store the number of good words // and the string index pairs List<KeyValuePair< int , int >> rating = new List<KeyValuePair< int , int >>(); for ( int i = 0; i < n; i++) { string reviewStr = review[i]; Convert( ref reviewStr); words = reviewStr.Split( ' ' ); k = 0; foreach ( string word in words) { // If this word is present in the trie // then increment its count if (Search(word, trie)) k++; } // Store the number of good words in the // current string paired with its // index in the original list rating.Add( new KeyValuePair< int , int >(k, i)); } // Using PairComparer to sort the list as required rating.Sort( new PairComparer()); // Print the sorted array // Print the sorted array for ( int i = 0; i < n; i++) { Console.WriteLine(review[rating[i].Value]); } } static void Main( string [] args) { string good = "geeks_for_geeks_is_great" ; List< string > review = new List< string >() { "geeks_are_geeks" , "geeks_dont_lose" , "geeks_for_geeks_is_love" }; SortArr(good, review); } } |
Javascript
// JavaScript code for the approach const MAX = 26; // Comparator function for sorting function cmp(a, b) { // Compare the number of good words if (a[0] == b[0]) { return a[1] - b[1]; } return b[0] - a[0]; } // Structure of a Trie node class Node { constructor(bul) { this .exist = bul; this .arr = new Array(MAX).fill( null ); } } // Function to add a string to the trie function add(s, trie) { // Add a node to the trie const n = s.length; for (let i = 0; i < n; i++) { // If trie doesn't already contain // the current node then create one if (trie.arr[s.charCodeAt(i) - 'a '.charCodeAt()] === null) { trie.arr[s.charCodeAt(i) - ' a '.charCodeAt()] = new Node(); } trie = trie.arr[s.charCodeAt(i) - ' a '.charCodeAt()]; } trie.exist = true; return; } // Function that returns true if // the trie contains the string s function search(s, trie) { // Search for a node in the trie for (let i = 0; i < s.length; i++) { if (trie.arr[s.charCodeAt(i) - ' a '.charCodeAt()] === null) { return false; } trie = trie.arr[s.charCodeAt(i) - ' a '.charCodeAt()]; } return trie.exist; } // Function to replace every ' _ ' with a // white space in the given string function convert(s) { // Convert ' _ ' to spaces return s.replace(/_/g, ' '); } // Function to sort the array based on good words function sortArr(good, review) { // Extract all the good words which // are ' _ ' separated good = convert(good); const trie = new Node(); const words = good.split(' '); // Building the entire trie from the good words for (let i = 0; i < words.length; i++) { add(words[i], trie); } const n = review.length; // To store the number of good words // and the string index pairs const rating = new Array(n).fill(null).map((_, i) => [0, i]); for (let i = 0; i < n; i++) { review[i] = convert(review[i]); const words = review[i].split(' '); let k = 0; for (let j = 0; j < words.length; j++) { // If this word is present in the trie // then increment its count if (search(words[j], trie)) { k += 1; } } // Store the number of good words in the // current string paired with its // index in the original array rating[i] = [k, i]; } // Using comparator function to // sort the array as required rating.sort((a, b) => cmp(a, b)); // Print the sorted array for (let i = 0; i < rating.length; i++) { console.log(review[rating[i][1]]); } } // Driver code // Input let S = ' geeks_for_geeks_is_great '; let R = [' geeks_are_geeks ', ' geeks_dont_lose ', ' geeks_for_geeks_is_love']; // Function Call sortArr(S, R); |
geeks for geeks is love geeks are geeks geeks dont lose
Time Complexity: O(N * M * K) where N is the number of reviews, M is the longest length of any review and K is the number of good words.
Auxiliary Space: O(K * 26^L) where K is the number of good words and L is the maximum length of any good word.
Approach (Using Hash Table): The hash table can be used to store the count of each good word in the input string and then sort the vector of strings based on the count of good words for each string.
Algorithm Steps:
- Define a function cmp as the comparator function for sorting based on the number of good words in a string and its index in the original array.
- Define a struct node for the Trie data structure and its functions.
- Define the add and search functions to add and search for a string in the Trie data structure.
- Define the convert function to replace every ‘_’ with a space in a given string.
- Define the sortArr function to sort the array based on the given conditions.
- In sortArr, convert the given good string to a Trie data structure and build it.
- Loop through the given review vector and count the number of good words present in each string using the search function and store the number of good words in the current string paired with its index in the original array.
- Sort the array using the cmp comparator function.
- Print the sorted array.
- In the main function, define a string containing good words S and a vector of strings to be sorted R.
- Call the sortArr function passing the S and R as arguments.
Below is he implementation of the above approach:
C++
//C++ code for the above approach #include <bits/stdc++.h> using namespace std; #define F first #define S second #define MAX 26 // Comparator function for sorting bool cmp( const pair< int , int >& a, const pair< int , int >& b) { // Compare the number of good words if (a.F == b.F) return a.S < b.S; return a.F > b.F; } // Structure of s Trie node struct node { bool exist; node* arr[MAX]; node( bool bul = false ) { exist = bul; for ( int i = 0; i < MAX; i++) arr[i] = NULL; } }; // Function to add a string to the trie void add(string s, node* trie) { // Add a node to the trie int n = s.size(); for ( int i = 0; i < n; i++) { // If trie doesn't already contain // the current node then create one if (trie->arr[s[i] - 'a' ] == NULL) trie->arr[s[i] - 'a' ] = new node(); trie = trie->arr[s[i] - 'a' ]; } trie->exist = true ; return ; } // Function that returns true if // the trie contains the string s bool search(string s, node* trie) { // Search for a node in the trie for ( int i = 0; i < s.size(); i++) { if (trie->arr[s[i] - 'a' ] == NULL) return false ; trie = trie->arr[s[i] - 'a' ]; } return trie->exist; } // Function to replace every '_' with a // white space in the given string void convert(string& str) { // Convert '_' to spaces for ( int i = 0; i < str.size(); i++) if (str[i] == '_' ) str[i] = ' ' ; return ; } // Function to sort the array based on good words void sortArr(string good, vector<string>& review) { // Extract all the good words which // are '_' separated convert(good); node* trie = new node(); string word; stringstream ss; ss << good; // Building the entire trie by stringstreaming // the 'good words' string while (ss >> word) add(word, trie); int k, n = review.size(); // To store the number of good words // and the string index pairs vector<pair< int , int > > rating(n); for ( int i = 0; i < n; i++) { convert(review[i]); ss.clear(); ss << review[i]; k = 0; while (ss >> word) { // If this word is present in the trie // then increment its count if (search(word, trie)) k++; } // Store the number of good words in the // current string paired with its // index in the original array rating[i].F = k; rating[i].S = i; } // Using comparator function to // sort the array as required sort(rating.begin(), rating.end(), cmp); // Print the sorted array for ( int i = 0; i < n; i++) cout << review[rating[i].S] << "\n" ; } // Driver code int main() { // String containing good words string S = "geeks_for_geeks_is_great" ; // Vector of strings to be sorted vector<string> R = { "geeks_are_geeks" , "geeks_dont_lose" , "geeks_for_geeks_is_love" }; // Sort the array based on the given conditions sortArr(S, R); } |
geeks for geeks is love geeks are geeks geeks dont lose
Time Complexity: O(NMlogM), where N is the number of strings in the vector and M is the maximum length of a string in the vector.
Auxiliary Space: O(M^2 + NM), where M^2 is the space required for the trie and NM is the space required for the vector and other variables.
Please Login to comment...