update 30 Substring with Concatenation of All Words

selfboot · selfboot · commit 402dac4734c5 · 2016-07-12T19:00:20.000+08:00
diff --git a/Combination/30_SubstringWithConcatenationOfAllWords.cpp b/Combination/30_SubstringWithConcatenationOfAllWords.cpp
@@ -0,0 +1,119 @@
+/*
+ * @Author: xuezaigds@gmail.com
+ * @Last Modified time: 2016-07-12 18:58:58
+ */
+
+class Solution {
+public:
+    // Easy to understand but slow.
+    vector<int> findSubstring(string s, vector<string>& words) {
+        int s_len = s.size();
+        int w_len = words.size();
+        if(s_len == 0 || w_len==0){
+            return {};
+        }
+
+        int wl = words[0].size();
+        int str_len = w_len * words[0].size();
+        unordered_map<string, int> word_cnt;
+        for(auto &w: words){
+            word_cnt[w] += 1;
+        }
+        vector<int> ans;
+        for(int i=0; i<s_len-str_len+1; i++){
+            unordered_map<string, int> tmp_cnt;
+            int j=0;
+            while(j<w_len){
+                string cur_word = s.substr(i+j*wl, wl);
+                if(word_cnt.find(cur_word)==word_cnt.end()){
+                    break;
+                }
+                else{
+                    tmp_cnt[cur_word] ++;
+                    if(tmp_cnt[cur_word] > word_cnt[cur_word]){
+                        break;
+                    }
+                }
+                j++;
+            }
+            if(j == w_len){
+                ans.push_back(i);
+            }
+        }
+        return ans;
+    }
+};
+
+
+class Solution_2 {
+public:
+    /*
+    Use hashmap and two point.
+
+    Travel all the words combinations to maintain a slicing window.
+    There are wl(word len) times travel, each time n/wl words:
+    mostly 2 times travel for each word:
+        one left side of the window, the other right side of the window
+    So, time complexity O(wl * 2 * N/wl) = O(2N)
+    Refer to:
+    https://discuss.leetcode.com/topic/6617/an-o-n-solution-with-detailed-explanation
+    */
+    vector<int> findSubstring(string s, vector<string>& words) {
+        int s_len = s.size();
+        int total_cnt = words.size();
+        if(s_len == 0 || total_cnt==0){
+            return {};
+        }
+
+        int w_len = words[0].size();
+        unordered_map<string, int> words_cnt;
+        for(auto &w: words){
+            words_cnt[w] ++;
+        }
+
+        vector<int> ans;
+        for(int i=0; i<w_len; i++){
+            int left=i, count=0;
+            unordered_map<string, int> candidate_cnt;
+            for(int j=i; j<=s_len-w_len; j+=w_len){
+                string cur_str = s.substr(j, w_len);
+                if(words_cnt.find(cur_str) != words_cnt.end()){
+                    candidate_cnt[cur_str] ++;
+                    count += 1;
+                    if(candidate_cnt[cur_str] > words_cnt[cur_str]){
+                        // A more word, advance the window left side possiablly
+                        while(candidate_cnt[cur_str] > words_cnt[cur_str]){
+                            string left_str = s.substr(left, w_len);
+                            candidate_cnt[left_str] --;
+                            left += w_len;
+                            count --;
+                        }
+                    }
+                    // come to a result
+                    if(count == total_cnt){
+                        ans.push_back(left);
+                        candidate_cnt[s.substr(left, w_len)] --;
+                        count --;
+                        left += w_len;
+                    }
+                }
+                else{
+                    left = j+w_len;
+                    candidate_cnt = {};
+                    count = 0;
+                }
+            }
+        }
+        return ans;
+    }
+};
+
+
+/*
+""
+[]
+"barfoothefoobarman"
+["foo", "bar"]
+"barfoofoobarthefoobarman"
+["bar","foo","the"]
+*/
diff --git a/Combination/30_SubstringWithConcatenationOfAllWords.py b/Combination/30_SubstringWithConcatenationOfAllWords.py
@@ -0,0 +1,127 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Author: xuezaigds@gmail.com
+# @Last Modified time: 2016-07-12 18:57:50
+
+
+class Solution(object):
+    """ Easy to think, but is very slow.
+
+    Use an unordered_map<string, int> counts to record the expected times of each word and
+    another unordered_map<string, int> seen to record the times we have seen
+    """
+
+    def findSubstring(self, s, words):
+        if not s or not words:
+            return []
+
+        word_cnt = {}
+        for w in words:
+            word_cnt[w] = word_cnt.get(w, 0) + 1
+
+        s_len, word_l = len(s), len(words[0])
+        concatenation_l = len(words) * word_l
+        ans = []
+        for i in range(s_len - concatenation_l + 1):
+            candidate_map = {}
+            j = 0
+            while j < len(words):
+                w = s[i + j * word_l: i + (j + 1) * word_l]
+                if w not in word_cnt:
+                    break
+                candidate_map[w] = candidate_map.get(w, 0) + 1
+                if candidate_map.get(w, 0) > word_cnt[w]:
+                    break
+                j += 1
+
+            if j == len(words):
+                ans.append(i)
+
+        return ans
+
+
+class Solution_2(object):
+    """ Use hashmap and two point.
+
+    Travel all the words combinations to maintain a slicing window.
+    There are wl(word len) times travel, each time n/wl words:
+    mostly 2 times travel for each word:
+        one left side of the window, the other right side of the window
+    So, time complexity O(wl * 2 * N/wl) = O(2N)
+    Refer to:
+    https://discuss.leetcode.com/topic/6617/an-o-n-solution-with-detailed-explanation
+    """
+    def findSubstring(self, s, words):
+        if not s or not words:
+            return []
+
+        word_cnt = {}
+        for w in words:
+            word_cnt[w] = word_cnt.get(w, 0) + 1
+
+        s_len, w_len, cnt = len(s), len(words[0]), len(words)
+        i = 0
+        ans = []
+        while i < w_len:
+            left, count = i, 0
+            candidate_cnt = {}
+            for j in range(i, s_len, w_len):
+                cur_str = s[j: j + w_len]
+                if cur_str in word_cnt:
+                    candidate_cnt[cur_str] = candidate_cnt.get(cur_str, 0) + 1
+                    count += 1
+                    if candidate_cnt[cur_str] <= word_cnt[cur_str]:
+                        pass
+                    else:
+                        # A more word, advance the window left side possiablly
+                        while candidate_cnt[cur_str] > word_cnt[cur_str]:
+                            left_str = s[left: left + w_len]
+                            candidate_cnt[left_str] -= 1
+                            left += w_len
+                            count -= 1
+
+                    # come to a result
+                    if count == cnt:
+                        ans.append(left)
+                        candidate_cnt[s[left:left + w_len]] -= 1
+                        count -= 1
+                        left += w_len
+                # not a valid word, clear the window.
+                else:
+                    candidate_cnt = {}
+                    left = j + w_len
+                    count = 0
+            i += 1
+        return ans
+
+
+class Solution_Fail(object):
+    """ Pythonic way, easy to think, but Time Limit Exceeded.
+
+    Use two hash-map.
+    """
+    def findSubstring(self, s, words):
+        if not s or not words:
+            return []
+        import collections
+        word_cnt = collections.Counter(words)
+        s_len, word_l = len(s), len(words[0])
+        concatenation_l = len(words) * word_l
+        ans = []
+        for i in range(s_len - concatenation_l + 1):
+            candidate_str = s[i:i + concatenation_l]
+            split_str = [candidate_str[j:j + word_l]
+                         for j in range(0, concatenation_l, word_l)]
+            candidate_cnt = collections.Counter(split_str)
+            if not (word_cnt - candidate_cnt):
+                ans.append(i)
+        return ans
+
+"""
+""
+[]
+"barfoothefoobarman"
+["foo", "bar"]
+"barfoofoobarthefoobarman"
+["bar","foo","the"]
+"""
diff --git a/README.md b/README.md
@@ -306,6 +306,7 @@
 
 # Combination
 
+* 030. [Substring with Concatenation of All Words](Combination/30_SubstringWithConcatenationOfAllWords.py)
 * 140. [Word Break II](Combination/140_WordBreakII.py)
 * 146. [LRU Cache](Combination/146_LRUCache.py)
 * 300. [Longest Increasing Subsequence](Combination/300_LongestIncreasingSubsequence.py)
diff --git a/Week04/30.py b/Week04/30.py