重构子串查找，去掉字符串长度参数

soulmachine · Nov 5, 2013 · bd60fd8 · bd60fd8
1 parent 2dbcf75
commit bd60fd8
Show file tree

Hide file tree

Showing 8 changed files with 77 additions and 78 deletions.
diff --git a/C++/chapBFS.tex b/C++/chapBFS.tex
@@ -1506,7 +1506,6 @@ \section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 
 \subsection{适用场景}
-注意，这里的总结是一种经验，一种概率，不是绝对的结论！
 
 \textbf{输入数据}：没什么特征，不像深搜，需要有“递归”的性质。如果是树或者图，概率更大。
 
@@ -1600,6 +1599,7 @@ \subsection{代码模板}
     state_t target;
 
     current.push(start);
+    visited.insert(start);
     while (!current.empty() && !found) {
         ++level;
         while (!current.empty() && !found) {

diff --git a/C++/chapDFS.tex b/C++/chapDFS.tex
@@ -557,8 +557,6 @@ \section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 \subsection{适用场景}
 
-注意，这里的总结是一种经验，一种概率，不是绝对的结论！
-
 \textbf{输入数据}：如果是递归数据结构，如单链表，二叉树，集合，则百分之百可以用深搜；如果是非递归数据结构，如一维数组，二维数组，字符串，图，则概率小一些。
 
 \textbf{状态转换图}：树或者图。

diff --git a/C++/chapString.tex b/C++/chapString.tex
@@ -217,13 +217,13 @@ \subsection{KMP算法}
  * @brief 计算部分匹配表，即next数组.
  *
  * @param[in] pattern 模式串
- * @param[in] m 模式串的长度
  * @param[out] next next数组
  * @return 无
  */
-void compute_prefix(const char pattern[], const int m, int next[]) {
+void compute_prefix(const char *pattern, int next[]) {
     int i;
     int j = -1;
+    const int m = strlen(pattern);
 
     next[0] = j;
     for (i = 1; i < m; i++) {
@@ -238,17 +238,19 @@ \subsection{KMP算法}
  * @brief KMP算法.
  *
  * @param[in] text 文本
- * @param[in] n 文本的长度
  * @param[in] pattern 模式串
- * @param[in] m 模式串的长度
  * @return 成功则返回第一次匹配的位置，失败则返回-1
  */
-int kmp(const char text[], const int n, const char pattern[], const int m) {
+int kmp(const char *text, const char *pattern) {
     int i;
     int j = -1;
+    const int n = strlen(text);
+    const int m = strlen(pattern);
+    if (n == 0 && m == 0) return 0; /* "","" */
+    if (m == 0) return 0;  /* "a","" */
     int *next = (int*)malloc(sizeof(int) * m);
 
-    compute_prefix(pattern, m, next);
+    compute_prefix(pattern, next);
 
     for (i = 0; i < n; i++) {
         while (j > -1 && pattern[j + 1] != text[i]) j = next[j];
@@ -269,7 +271,7 @@ \subsection{KMP算法}
     char text[] = "ABC ABCDAB ABCDABCDABDE";
     char pattern[] = "ABCDABD";
     char *ch = text;
-    int i = kmp(text, strlen(text), pattern, strlen(pattern));
+    int i = kmp(text, pattern);
 
     if (i >= 0) printf("matched @: %s\n", ch + i);
     return 0;
@@ -301,20 +303,21 @@ \subsection{Boyer-Moore算法}
  * @brief 预处理，计算每个字母最靠右的位置.
  *
  * @param[in] pattern 模式串
- * @param[in] m 模式串的长度
  * @param[out] right 每个字母最靠右的位置
  * @return 无
  */
-static void pre_right(const char pattern[], const int m, int right[]) {
+static void pre_right(const char *pattern, int right[]) {
     int i;
+    const int m = strlen(pattern);
 
     for (i = 0; i < ASIZE; ++i) right[i] = -1;
-    for (i = 0; i < m; ++i) right[pattern[i]] = i;
+    for (i = 0; i < m; ++i) right[(unsigned char)pattern[i]] = i;
 }
 
 
-static void suffixes(const char pattern[], const int m, int suff[]) {
+static void suffixes(const char *pattern, int suff[]) {
     int f, g, i;
+    const int m = strlen(pattern);
 
     suff[m - 1] = m;
     g = m - 1;
@@ -336,23 +339,23 @@ \subsection{Boyer-Moore算法}
  * @brief 预处理，计算好后缀的后移位置.
  *
  * @param[in] pattern 模式串
- * @param[in] m 模式串的长度
  * @param[out] gs 好后缀的后移位置
  * @return 无
  */
-static void pre_gs(const char pattern[], const int m, int gs[]) {
+static void pre_gs(const char pattern[], int gs[]) {
     int i, j;
+    const int m = strlen(pattern);
     int *suff = (int*)malloc(sizeof(int) * (m + 1));
 
-    suffixes(pattern, m, suff);
+    suffixes(pattern, suff);
 
     for (i = 0; i < m; ++i) gs[i] = m;
 
     j = 0;
     for (i = m - 1; i >= 0; --i) if (suff[i] == i + 1)
         for (; j < m - 1 - i; ++j) if (gs[j] == m)
             gs[j] = m - 1 - i;
-    for (i = 0; i <= m - 2; ++i) 
+    for (i = 0; i <= m - 2; ++i)
         gs[m - 1 - suff[i]] = m - 1 - i;
     free(suff);
 }
@@ -361,20 +364,19 @@ \subsection{Boyer-Moore算法}
  * @brief Boyer-Moore算法.
  *
  * @param[in] text 文本
- * @param[in] n 文本的长度
  * @param[in] pattern 模式串
- * @param[in] m 模式串的长度
  * @return 成功则返回第一次匹配的位置，失败则返回-1
  */
-int boyer_moore(const char text[], const int n, 
-                const char pattern[], const int m) {
+int boyer_moore(const char *text, const char *pattern) {
     int i, j;
     int right[ASIZE];  /* bad-character shift */
+    const int n = strlen(text);
+    const int m = strlen(pattern);
     int *gs = (int*)malloc(sizeof(int) * (m + 1));  /* good-suffix shift */
 
     /* Preprocessing */
-    pre_right(pattern, m, right);
-    pre_gs(pattern, m, gs);
+    pre_right(pattern, right);
+    pre_gs(pattern, gs);
 
     /* Searching */
     j = 0;
@@ -387,8 +389,8 @@ \subsection{Boyer-Moore算法}
             free(gs);
             return j;
         } else {
-            const int max = gs[i] > right[text[i + j]] - m + 1 + i ?
-                gs[i] : i - right[text[i + j]];
+            const int max = gs[i] > right[(unsigned char)text[i + j]] -
+                    m + 1 + i ? gs[i] : i - right[(unsigned char)text[i + j]];
             j += max;
         }
     }
@@ -398,9 +400,9 @@ \subsection{Boyer-Moore算法}
 
 
 int main() {
-    const char text[]="HERE IS A SIMPLE EXAMPLE";
-    const char pattern[] = "EXAMPLE";
-    const int pos = boyer_moore(text, strlen(text), pattern, strlen(pattern));
+    const char *text="HERE IS A SIMPLE EXAMPLE";
+    const char *pattern = "EXAMPLE";
+    const int pos = boyer_moore(text, pattern);
     printf("%d\n", pos); /* 17 */
     return 0;
 }
@@ -442,7 +444,7 @@ \subsection{Rabin-Karp算法}
  * @param[int] RM R^(M-1) % Q
  * @return 起始于位置i+1的M个字符的子字符串所对应的哈希值
  */
-static long rehash(const long h, const char first, const char next, 
+static long rehash(const long h, const char first, const char next,
                    const long RM) {
     long newh = (h + Q - RM * first % Q) % Q;
     newh = (newh * R + next) % Q;
@@ -451,13 +453,12 @@ \subsection{Rabin-Karp算法}
 
 /*
  * @brief 用蒙特卡洛算法，判断两个字符串是否相等.
- * 
+ *
  * @param[in] pattern 模式串
  * @param[in] substring 原始文本长度为M的子串
- * @param[in] M 模式串的长度，也是substring的长度
  * @return 两个字符串相同，返回1，否则返回0
  */
-static int check(const char pattern[], const char substring[], const int M) {
+static int check(const char *pattern, const char substring[]) {
     return 1;
 }
 
@@ -470,17 +471,18 @@ \subsection{Rabin-Karp算法}
  * @param[in] m 模式串的长度
  * @return 成功则返回第一次匹配的位置，失败则返回-1
  */
-int rabin_karp(const char text[], const int n, 
-               const char pattern[], const int m) {
+int rabin_karp(const char *text, const char *pattern) {
     int i;
+    const int n = strlen(text);
+    const int m = strlen(pattern);
     const long pattern_hash = hash(pattern, m);
     long text_hash = hash(text, m);
     int RM = 1;
     for (i = 0; i < m - 1; ++i) RM = (RM * R) % Q;
 
     for (i = 0; i <= n - m; ++i) {
         if (text_hash == pattern_hash) {
-            if (check(pattern, &text[i], m)) return i;
+            if (check(pattern, &text[i])) return i;
         }
         text_hash = rehash(text_hash, text[i], text[i + m], RM);
     }
@@ -489,9 +491,9 @@ \subsection{Rabin-Karp算法}
 
 
 int main() {
-    const char text[]="HERE IS A SIMPLE EXAMPLE";
-    const char pattern[] = "EXAMPLE";
-    const int pos = rabin_karp(text, strlen(text), pattern, strlen(pattern));
+    const char *text = "HERE IS A SIMPLE EXAMPLE";
+    const char *pattern = "EXAMPLE";
+    const int pos = rabin_karp(text, pattern);
     printf("%d\n", pos); /* 17 */
     return 0;
 }

diff --git a/C/chapBFS.tex b/C/chapBFS.tex
@@ -1576,7 +1576,6 @@ \section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 
 \subsection{适用场景}
-注意，这里的总结是一种经验，一种概率，不是绝对的结论！
 
 \textbf{输入数据}：没什么特征，不像深搜，需要有“递归”的性质。如果是树或者图，概率更大。
 

diff --git a/C/chapDFS.tex b/C/chapDFS.tex
@@ -557,8 +557,6 @@ \section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 \subsection{适用场景}
 
-注意，这里的总结是一种经验，一种概率，不是绝对的结论！
-
 \textbf{输入数据}：如果是递归数据结构，如单链表，二叉树，集合，则百分之百可以用深搜；如果是非递归数据结构，如一维数组，二维数组，字符串，图，则概率小一些。
 
 \textbf{状态转换图}：树或者图。
Original file line number	Diff line number	Diff line change
Expand Up		@@ -1576,7 +1576,6 @@ \section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


		\subsection{适用场景}
		注意，这里的总结是一种经验，一种概率，不是绝对的结论！

		\textbf{输入数据}：没什么特征，不像深搜，需要有“递归”的性质。如果是树或者图，概率更大。

Expand Down