Skip to content

Commit

Permalink
重构子串查找,去掉字符串长度参数
Browse files Browse the repository at this point in the history
  • Loading branch information
soulmachine committed Nov 5, 2013
1 parent 2dbcf75 commit bd60fd8
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 78 deletions.
2 changes: 1 addition & 1 deletion C++/chapBFS.tex
Original file line number Diff line number Diff line change
Expand Up @@ -1506,7 +1506,6 @@ \section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\subsection{适用场景}
注意,这里的总结是一种经验,一种概率,不是绝对的结论!

\textbf{输入数据}:没什么特征,不像深搜,需要有“递归”的性质。如果是树或者图,概率更大。

Expand Down Expand Up @@ -1600,6 +1599,7 @@ \subsection{代码模板}
state_t target;

current.push(start);
visited.insert(start);
while (!current.empty() && !found) {
++level;
while (!current.empty() && !found) {
Expand Down
2 changes: 0 additions & 2 deletions C++/chapDFS.tex
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,6 @@ \section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsection{适用场景}

注意,这里的总结是一种经验,一种概率,不是绝对的结论!

\textbf{输入数据}:如果是递归数据结构,如单链表,二叉树,集合,则百分之百可以用深搜;如果是非递归数据结构,如一维数组,二维数组,字符串,图,则概率小一些。

\textbf{状态转换图}:树或者图。
Expand Down
74 changes: 38 additions & 36 deletions C++/chapString.tex
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,13 @@ \subsection{KMP算法}
* @brief 计算部分匹配表,即next数组.
*
* @param[in] pattern 模式串
* @param[in] m 模式串的长度
* @param[out] next next数组
* @return 无
*/
void compute_prefix(const char pattern[], const int m, int next[]) {
void compute_prefix(const char *pattern, int next[]) {
int i;
int j = -1;
const int m = strlen(pattern);

next[0] = j;
for (i = 1; i < m; i++) {
Expand All @@ -238,17 +238,19 @@ \subsection{KMP算法}
* @brief KMP算法.
*
* @param[in] text 文本
* @param[in] n 文本的长度
* @param[in] pattern 模式串
* @param[in] m 模式串的长度
* @return 成功则返回第一次匹配的位置,失败则返回-1
*/
int kmp(const char text[], const int n, const char pattern[], const int m) {
int kmp(const char *text, const char *pattern) {
int i;
int j = -1;
const int n = strlen(text);
const int m = strlen(pattern);
if (n == 0 && m == 0) return 0; /* "","" */
if (m == 0) return 0; /* "a","" */
int *next = (int*)malloc(sizeof(int) * m);

compute_prefix(pattern, m, next);
compute_prefix(pattern, next);

for (i = 0; i < n; i++) {
while (j > -1 && pattern[j + 1] != text[i]) j = next[j];
Expand All @@ -269,7 +271,7 @@ \subsection{KMP算法}
char text[] = "ABC ABCDAB ABCDABCDABDE";
char pattern[] = "ABCDABD";
char *ch = text;
int i = kmp(text, strlen(text), pattern, strlen(pattern));
int i = kmp(text, pattern);

if (i >= 0) printf("matched @: %s\n", ch + i);
return 0;
Expand Down Expand Up @@ -301,20 +303,21 @@ \subsection{Boyer-Moore算法}
* @brief 预处理,计算每个字母最靠右的位置.
*
* @param[in] pattern 模式串
* @param[in] m 模式串的长度
* @param[out] right 每个字母最靠右的位置
* @return 无
*/
static void pre_right(const char pattern[], const int m, int right[]) {
static void pre_right(const char *pattern, int right[]) {
int i;
const int m = strlen(pattern);

for (i = 0; i < ASIZE; ++i) right[i] = -1;
for (i = 0; i < m; ++i) right[pattern[i]] = i;
for (i = 0; i < m; ++i) right[(unsigned char)pattern[i]] = i;
}


static void suffixes(const char pattern[], const int m, int suff[]) {
static void suffixes(const char *pattern, int suff[]) {
int f, g, i;
const int m = strlen(pattern);

suff[m - 1] = m;
g = m - 1;
Expand All @@ -336,23 +339,23 @@ \subsection{Boyer-Moore算法}
* @brief 预处理,计算好后缀的后移位置.
*
* @param[in] pattern 模式串
* @param[in] m 模式串的长度
* @param[out] gs 好后缀的后移位置
* @return 无
*/
static void pre_gs(const char pattern[], const int m, int gs[]) {
static void pre_gs(const char pattern[], int gs[]) {
int i, j;
const int m = strlen(pattern);
int *suff = (int*)malloc(sizeof(int) * (m + 1));

suffixes(pattern, m, suff);
suffixes(pattern, suff);

for (i = 0; i < m; ++i) gs[i] = m;

j = 0;
for (i = m - 1; i >= 0; --i) if (suff[i] == i + 1)
for (; j < m - 1 - i; ++j) if (gs[j] == m)
gs[j] = m - 1 - i;
for (i = 0; i <= m - 2; ++i)
for (i = 0; i <= m - 2; ++i)
gs[m - 1 - suff[i]] = m - 1 - i;
free(suff);
}
Expand All @@ -361,20 +364,19 @@ \subsection{Boyer-Moore算法}
* @brief Boyer-Moore算法.
*
* @param[in] text 文本
* @param[in] n 文本的长度
* @param[in] pattern 模式串
* @param[in] m 模式串的长度
* @return 成功则返回第一次匹配的位置,失败则返回-1
*/
int boyer_moore(const char text[], const int n,
const char pattern[], const int m) {
int boyer_moore(const char *text, const char *pattern) {
int i, j;
int right[ASIZE]; /* bad-character shift */
const int n = strlen(text);
const int m = strlen(pattern);
int *gs = (int*)malloc(sizeof(int) * (m + 1)); /* good-suffix shift */

/* Preprocessing */
pre_right(pattern, m, right);
pre_gs(pattern, m, gs);
pre_right(pattern, right);
pre_gs(pattern, gs);

/* Searching */
j = 0;
Expand All @@ -387,8 +389,8 @@ \subsection{Boyer-Moore算法}
free(gs);
return j;
} else {
const int max = gs[i] > right[text[i + j]] - m + 1 + i ?
gs[i] : i - right[text[i + j]];
const int max = gs[i] > right[(unsigned char)text[i + j]] -
m + 1 + i ? gs[i] : i - right[(unsigned char)text[i + j]];
j += max;
}
}
Expand All @@ -398,9 +400,9 @@ \subsection{Boyer-Moore算法}


int main() {
const char text[]="HERE IS A SIMPLE EXAMPLE";
const char pattern[] = "EXAMPLE";
const int pos = boyer_moore(text, strlen(text), pattern, strlen(pattern));
const char *text="HERE IS A SIMPLE EXAMPLE";
const char *pattern = "EXAMPLE";
const int pos = boyer_moore(text, pattern);
printf("%d\n", pos); /* 17 */
return 0;
}
Expand Down Expand Up @@ -442,7 +444,7 @@ \subsection{Rabin-Karp算法}
* @param[int] RM R^(M-1) % Q
* @return 起始于位置i+1的M个字符的子字符串所对应的哈希值
*/
static long rehash(const long h, const char first, const char next,
static long rehash(const long h, const char first, const char next,
const long RM) {
long newh = (h + Q - RM * first % Q) % Q;
newh = (newh * R + next) % Q;
Expand All @@ -451,13 +453,12 @@ \subsection{Rabin-Karp算法}

/*
* @brief 用蒙特卡洛算法,判断两个字符串是否相等.
*
*
* @param[in] pattern 模式串
* @param[in] substring 原始文本长度为M的子串
* @param[in] M 模式串的长度,也是substring的长度
* @return 两个字符串相同,返回1,否则返回0
*/
static int check(const char pattern[], const char substring[], const int M) {
static int check(const char *pattern, const char substring[]) {
return 1;
}

Expand All @@ -470,17 +471,18 @@ \subsection{Rabin-Karp算法}
* @param[in] m 模式串的长度
* @return 成功则返回第一次匹配的位置,失败则返回-1
*/
int rabin_karp(const char text[], const int n,
const char pattern[], const int m) {
int rabin_karp(const char *text, const char *pattern) {
int i;
const int n = strlen(text);
const int m = strlen(pattern);
const long pattern_hash = hash(pattern, m);
long text_hash = hash(text, m);
int RM = 1;
for (i = 0; i < m - 1; ++i) RM = (RM * R) % Q;

for (i = 0; i <= n - m; ++i) {
if (text_hash == pattern_hash) {
if (check(pattern, &text[i], m)) return i;
if (check(pattern, &text[i])) return i;
}
text_hash = rehash(text_hash, text[i], text[i + m], RM);
}
Expand All @@ -489,9 +491,9 @@ \subsection{Rabin-Karp算法}


int main() {
const char text[]="HERE IS A SIMPLE EXAMPLE";
const char pattern[] = "EXAMPLE";
const int pos = rabin_karp(text, strlen(text), pattern, strlen(pattern));
const char *text = "HERE IS A SIMPLE EXAMPLE";
const char *pattern = "EXAMPLE";
const int pos = rabin_karp(text, pattern);
printf("%d\n", pos); /* 17 */
return 0;
}
Expand Down
1 change: 0 additions & 1 deletion C/chapBFS.tex
Original file line number Diff line number Diff line change
Expand Up @@ -1576,7 +1576,6 @@ \section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\subsection{适用场景}
注意,这里的总结是一种经验,一种概率,不是绝对的结论!

\textbf{输入数据}:没什么特征,不像深搜,需要有“递归”的性质。如果是树或者图,概率更大。

Expand Down
2 changes: 0 additions & 2 deletions C/chapDFS.tex
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,6 @@ \section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsection{适用场景}

注意,这里的总结是一种经验,一种概率,不是绝对的结论!

\textbf{输入数据}:如果是递归数据结构,如单链表,二叉树,集合,则百分之百可以用深搜;如果是非递归数据结构,如一维数组,二维数组,字符串,图,则概率小一些。

\textbf{状态转换图}:树或者图。
Expand Down
Loading

0 comments on commit bd60fd8

Please sign in to comment.