Substring with Concatenation of All Words

gouthampradhan · gouthampradhan · commit c3992f30769e · 2019-03-02T13:23:00.000+01:00
diff --git a/README.md b/README.md
@@ -206,6 +206,7 @@ My accepted leetcode solutions to some of the common interview problems.
 - [Partition Labels](problems/src/hashing/PartitionLabels.java) (Medium)
 - [Custom Sort String](problems/src/hashing/CustomSortString.java) (Medium)
 - [Short Encoding of Words](problems/src/hashing/ShortEncodingOfWords.java) (Medium)
+- [Substring with Concatenation of All Words](problems/src/hashing/SubstringConcatenationOfWords.java) (Hard)
 
 #### [Heap](problems/src/heap)
 
diff --git a/problems/src/hashing/SubstringConcatenationOfWords.java b/problems/src/hashing/SubstringConcatenationOfWords.java
@@ -0,0 +1,79 @@
+package hashing;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Created by gouthamvidyapradhan on 02/03/2019
+ * You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices
+ * of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.
+ *
+ * Example 1:
+ *
+ * Input:
+ *   s = "barfoothefoobarman",
+ *   words = ["foo","bar"]
+ * Output: [0,9]
+ * Explanation: Substrings starting at index 0 and 9 are "barfoor" and "foobar" respectively.
+ * The output order does not matter, returning [9,0] is fine too.
+ * Example 2:
+ *
+ * Input:
+ *   s = "wordgoodgoodgoodbestword",
+ *   words = ["word","good","best","word"]
+ * Output: []
+ *
+ * Solution:
+ * General idea is to do the following
+ * 1. Calculate the word count for the given array of words and store this in a HashMap.
+ * 2. For every substring (substring of s) of length (words[0].length() * words.length) split this into words of
+ * length words[0].length and calculate the word frequency for the split words. If the word frequency matches
+ * the word frequency of the given original word list then add the starting index of this substring into the result
+ * array.
+ *
+ * A small optimization is to break the substring match as soon as you find out that the word formed from the substring
+ * is not part of the original given word list or if the frequency of the word exceeds the frequency of the original
+ * word count.
+ */
+public class SubstringConcatenationOfWords {
+
+    /**
+     * Main method
+     * @param args
+     */
+    public static void main(String[] args) {
+        String[] words = {"word","good","best","word"};
+        System.out.println(new SubstringConcatenationOfWords().findSubstring("wordgoodgoodgoodbestword", words));
+    }
+
+    public List<Integer> findSubstring(String s, String[] words) {
+        if(words.length == 0) return new ArrayList<>();
+        int wLen = words[0].length();
+        int sLen = wLen * words.length;
+        List<Integer> result = new ArrayList<>();
+        if(sLen > s.length()) return result;
+        Map<String, Integer> countMap = new HashMap<>();
+        for(String w : words){
+            countMap.putIfAbsent(w, 0);
+            countMap.put(w, countMap.get(w) + 1);
+        }
+        for(int k = 0; (s.length() - k) >= sLen; k++) {
+            Map<String, Integer> subSMap = new HashMap<>();
+            int i = k;
+            for(int j = i + wLen; (i - k) < sLen; i = j, j += wLen){
+                String subS = s.substring(i, j);
+                subSMap.putIfAbsent(subS, 0);
+                subSMap.put(subS, subSMap.get(subS) + 1);
+                if(!countMap.containsKey(subS) || subSMap.get(subS) > countMap.get(subS)){
+                    break;
+                }
+            }
+            if((i - k) >= sLen){
+                result.add(k);
+            }
+        }
+        return result;
+    }
+}