Array     Breadth-First Search     C++     Depth-First Search     Hard     String     Union Find    

Problem Statement:

Two strings X and Y are similar if we can swap two letters (in different positions) of X, so that it equals Y. Also two strings X and Y are similar if they are equal.

For example, "tars" and "rats" are similar (swapping at positions 0 and 2), and "rats" and "arts" are similar, but "star" is not similar to "tars", "rats", or "arts".

Together, these form two connected groups by similarity: {"tars", "rats", "arts"} and {"star"}.  Notice that "tars" and "arts" are in the same group even though they are not similar.  Formally, each group is such that a word is in the group if and only if it is similar to at least one other word in the group.

We are given a list strs of strings where every string in strs is an anagram of every other string in strs. How many groups are there?

 

Example 1:

Input: strs = ["tars","rats","arts","star"]
Output: 2

Example 2:

Input: strs = ["omv","ovm"]
Output: 1

 

Constraints:

  • 1 <= strs.length <= 300
  • 1 <= strs[i].length <= 300
  • strs[i] consists of lowercase letters only.
  • All words in strs have the same length and are anagrams of each other.

Solution:

Notion of similarity: Two strings are similar if they differ at exactly 0 or 2 positions. This is because we are given that all strings are anagrams of each other, so 0 swaps means differ at 0 positions and 1 swap means differ at exactly 2 positions.

We can now create a graph where each string in strs is a node in the graph and two nodes are connected if they are similar strings as defined above.

DFS solution: We run DFS for each unvisited node. Number of times we have to start DFS is the answer.

 
class Solution {
public:
    void dfs(vector<vector<int>>&G, vector<bool>&visited, int cur)
    {
        visited[cur] = true;
        for (int nbd: G[cur]) if (!visited[nbd]) dfs(G, visited, nbd);
    }
    bool isSimilar(string s1, string s2)
    {
        int diff=0;
        for (int i=0; i<s1.length(); i++) if (s1[i]!=s2[i]) if(++diff>2) return false;
        return true;
    }
    int numSimilarGroups(vector<string>& strs) 
    {
        int n = strs.size(), res=0;
        vector<vector<int>> G(n);
        for(int i=0; i<n; i++) for(int j=i+1; j<n; j++) if(isSimilar(strs[i],strs[j]))
        {
            G[i].push_back(j);
            G[j].push_back(i);
        }
        vector<bool>visited(n,false);
        for (int i=0; i<n; i++) if(!visited[i]) 
        {
            dfs(G,visited,i);
            res++;
        }
        return res;
    }
};
 

BFS solution: Same thing can be done using a queue in a BFS.

 
class Solution {
public:
    void bfs(vector<vector<int>>&G, vector<bool>&visited, int cur)
    {
        queue<int>Q;
        Q.push(cur);
        visited[cur] = true;
        while (!Q.empty())
        {
            int node = Q.front();
            Q.pop();
            for (int nbd: G[node]) if(!visited[nbd])
            {
                Q.push(nbd);
                visited[nbd] = true;
            }
        }
    }
    bool isSimilar(string s1, string s2)
    {
        int diff=0;
        for (int i=0; i<s1.length(); i++) if (s1[i]!=s2[i]) if(++diff>2) return false;
        return true;
    }
    int numSimilarGroups(vector<string>& strs) 
    {
        int n = strs.size(), res=0;
        vector<vector<int>> G(n);
        for(int i=0; i<n; i++) for(int j=i+1; j<n; j++) if(isSimilar(strs[i],strs[j]))
        {
            G[i].push_back(j);
            G[j].push_back(i);
        }
        vector<bool>visited(n,false);
        for (int i=0; i<n; i++) if(!visited[i]) 
        {
            bfs(G,visited,i);
            res++;
        }
        return res;
    }
};
 

Union-Find solution or Disjoint-Set-Union (DSU) solution: We can also create a Union-Find data structure where we create union for each pair of similar words. Read more on Union-Find in this LC explore card. Here we will use an optimized implementation of Union-Find with path compression and union by rank. Finally the number of unique root nodes in the UF data structure is the answer.

 
class UnionFind {
public:
    UnionFind(int sz) : root(sz), ranks(sz) {
        for (int i = 0; i < sz; i++) {
            root[i] = i;
            ranks[i] = 1;
        }
    }
    int find(int x) {
        if (x==root[x]) return x;
        return root[x] = find(root[x]);
    }
    void unionSet(int x, int y) {
        int rootX = find(x);
        int rootY = find(y);
        if (rootX == rootY) return;
        if(ranks[rootX]==ranks[rootY]) ranks[rootX]++;
        else if (ranks[rootX]<ranks[rootY]) swap(rootY,rootX);
        root[rootY] = rootX;
    }
private:
    vector<int> root;
    vector<int> ranks;
};

class Solution {
public:
    bool isSimilar(string s1, string s2)
    {
        int diff=0;
        for (int i=0; i<s1.length(); i++) if (s1[i]!=s2[i]) if(++diff>2) return false;
        return true;
    }
    int numSimilarGroups(vector<string>& strs) 
    {
        int n = strs.size();
        UnionFind uf = UnionFind(n);
        for(int i=0; i<n; i++) for(int j=i+1; j<n; j++) 
        if(isSimilar(strs[i],strs[j])) uf.unionSet(i, j);
        unordered_set<int> myset;
        for(int i=0; i<n; i++) myset.insert(uf.find(i));
        return myset.size();
    }
};
 

All three solutions have TC of $O(n^2 \vert s \vert)$ and SC of DFS and BFS solutions is $O(n^2)$ (to store the graph) whereas SC of Union-Find solution is $O(n)$.