C++/chapBFS.tex

\chapter{广度优先搜索}
当题目看不出任何规律，既不能用分治，贪心，也不能用动规时，这时候万能方法——搜索，
就派上用场了。搜索分为广搜和深搜，广搜里面又有普通广搜，双向广搜，A*搜索等。
深搜里面又有普通深搜，回溯法等。

广搜和深搜非常类似（除了在扩展节点这部分不一样），二者有相同的框架，如何表示状态？
如何扩展状态？如何判重？尤其是判重，解决了这个问题，基本上整个问题就解决了。


\section{Word Ladder} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\label{sec:word-ladder}


\subsubsection{描述}
Given two words (start and end), and a dictionary, find the length of shortest transformation sequence from start to end, such that:
\begindot
\item Only one letter can be changed at a time
\item Each intermediate word must exist in the dictionary
\myenddot

For example, Given:

\begin{Code}
start = "hit"
end = "cog"
dict = ["hot","dot","dog","lot","log"]
\end{Code}
As one shortest transformation is \code{"hit" -> "hot" -> "dot" -> "dog" -> "cog"}, return its length $5$.

Note:
\begindot
\item Return 0 if there is no such transformation sequence.
\item All words have the same length.
\item All words contain only lowercase alphabetic characters.
\myenddot


\subsubsection{分析}


\subsubsection{代码}
\begin{Code}
//LeetCode, Word Ladder
// 时间复杂度O(n)，空间复杂度O(n)
class Solution {
public:
    int ladderLength(const string& start, const string &end,
            const unordered_set<string> &dict) {
        queue<string> current, next;    // 当前层，下一层
        unordered_set<string> visited;  // 判重

        int level = 0;  // 层次
        bool found = false;

        auto state_is_target = [&](const string &s) {return s == end;};
        auto state_extend = [&](const string &s) {
            vector<string> result;

            for (size_t i = 0; i < s.size(); ++i) {
                string new_word(s);
                for (char c = 'a'; c <= 'z'; c++) {
                    if (c == new_word[i]) continue;

                    swap(c, new_word[i]);

                    if ((dict.count(new_word) > 0 || new_word == end) &&
                            !visited.count(new_word)) {
                        result.push_back(new_word);
                        visited.insert(new_word);
                    }
                    swap(c, new_word[i]); // 恢复该单词
                }
            }

            return result;
        };

        current.push(start);
        while (!current.empty() && !found) {
            ++level;
            while (!current.empty() && !found) {
                const string str = current.front();
                current.pop();

                const auto& new_states = state_extend(str);
                for (const auto& state : new_states) {
                    next.push(state);
                    if (state_is_target(state)) {
                        found = true; //找到了
                        break;
                    }
                }
            }
            swap(next, current);
        }
        if (found) return level + 1;
        else return 0;
    }
};
\end{Code}


\subsubsection{相关题目}

\begindot
\item Word Ladder II，见 \S \ref{sec:word-ladder-ii}
\myenddot


\section{Word Ladder II} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\label{sec:word-ladder-ii}


\subsubsection{描述}
Given two words (start and end), and a dictionary, find all shortest transformation sequence(s) from start to end, such that:
\begindot
\item Only one letter can be changed at a time
\item Each intermediate word must exist in the dictionary
\myenddot

For example, Given:
\begin{Code}
start = "hit"
end = "cog"
dict = ["hot","dot","dog","lot","log"]
\end{Code}
Return
\begin{Code}
[
    ["hit","hot","dot","dog","cog"],
    ["hit","hot","lot","log","cog"]
]
\end{Code}

Note:
\begindot
\item All words have the same length.
\item All words contain only lowercase alphabetic characters.
\myenddot


\subsubsection{分析}
跟 Word Ladder比，这题是求路径本身，不是路径长度，也是BFS，略微麻烦点。

这题跟普通的广搜有很大的不同，就是要输出所有路径，因此在记录前驱和判重地方与普通广搜略有不同。


\subsubsection{代码}

\begin{Code}
//LeetCode, Word Ladder II
// 时间复杂度O(n)，空间复杂度O(n)
class Solution {
public:
    vector<vector<string> > findLadders(string start, string end,
            const unordered_set<string> &dict) {
        unordered_set<string> current, next;  // 当前层，下一层，用集合是为了去重
        unordered_set<string> visited; // 判重
        unordered_map<string, vector<string> > father; // 树

        bool found = false;

        auto state_is_target = [&](const string &s) {return s == end;};
        auto state_extend = [&](const string &s) {
            unordered_set<string> result;

            for (size_t i = 0; i < s.size(); ++i) {
                string new_word(s);
                for (char c = 'a'; c <= 'z'; c++) {
                    if (c == new_word[i]) continue;

                    swap(c, new_word[i]);

                    if ((dict.count(new_word) > 0 || new_word == end) &&
                             !visited.count(new_word)) {
                        result.insert(new_word);
                    }
                    swap(c, new_word[i]); // 恢复该单词
                }
            }

            return result;
        };

        current.insert(start);
        while (!current.empty() && !found) {
            // 先将本层全部置为已访问，防止同层之间互相指向
            for (const auto& word : current)
                visited.insert(word);
            for (const auto& word : current) {
                const auto new_states = state_extend(word);
                for (const auto &state : new_states) {
                    if (state_is_target(state)) found = true;
                    next.insert(state);
                    father[state].push_back(word);
                    // visited.insert(state); // 移动到最上面了
                }
            }

            current.clear();
            swap(current, next);
        }
        vector<vector<string> > result;
        if (found) {
            vector<string> path;
            gen_path(father, path, start, end, result);
        }
        return result;
    }
private:
    void gen_path(unordered_map<string, vector<string> > &father,
            vector<string> &path, const string &start, const string &word,
            vector<vector<string> > &result) {
        path.push_back(word);
        if (word == start) {
            result.push_back(path);
            reverse(result.back().begin(), result.back().end());
        } else {
            for (const auto& f : father[word]) {
                gen_path(father, path, start, f, result);
            }
        }
        path.pop_back();
    }
};
\end{Code}


\subsubsection{相关题目}

\begindot
\item Word Ladder，见 \S \ref{sec:word-ladder}
\myenddot


\section{Surrounded Regions} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\label{sec:surrounded-regions}


\subsubsection{描述}
Given a 2D board containing \fn{'X'} and \fn{'O'}, capture all regions surrounded by \fn{'X'}.

A region is captured by flipping all \fn{'O'}s into \fn{'X'}s in that surrounded region .

For example,
\begin{Code}
X X X X
X O O X
X X O X
X O X X
\end{Code}

After running your function, the board should be:
\begin{Code}
X X X X
X X X X
X X X X
X O X X
\end{Code}


\subsubsection{分析}
广搜。从上下左右四个边界往里走，凡是能碰到的\fn{'O'}，都是跟边界接壤的，应该保留。


\subsubsection{代码}
\begin{Code}
// LeetCode, Surrounded Regions
// BFS，时间复杂度O(n)，空间复杂度O(n)
class Solution {
public:
    void solve(vector<vector<char>> &board) {
        if (board.empty()) return;

        const int m = board.size();
        const int n = board[0].size();
        for (int i = 0; i < n; i++) {
            bfs(board, 0, i);
            bfs(board, m - 1, i);
        }
        for (int j = 1; j < m - 1; j++) {
            bfs(board, j, 0);
            bfs(board, j, n - 1);
        }
        for (int i = 0; i < m; i++)
            for (int j = 0; j < n; j++)
                if (board[i][j] == 'O')
                    board[i][j] = 'X';
                else if (board[i][j] == '+')
                    board[i][j] = 'O';
    }
private:
    void bfs(vector<vector<char>> &board, int i, int j) {
        typedef pair<int, int> state_t;
        queue<state_t> q;
        const int m = board.size();
        const int n = board[0].size();

        auto is_valid = [&](const state_t &s) {
            const int x = s.first;
            const int y = s.second;
            if (x < 0 || x >= m || y < 0 || y >= n || board[x][y] != 'O')
                return false;
            return true;
        };

        auto state_extend = [&](const state_t &s) {
            vector<state_t> result;
            const int x = s.first;
            const int y = s.second;
            // 上下左右
            const state_t new_states[4] = {{x-1,y}, {x+1,y},
                    {x,y-1}, {x,y+1}};
            for (int k = 0; k < 4;  ++k) {
                if (is_valid(new_states[k])) {
                    // 既有标记功能又有去重功能
                    board[new_states[k].first][new_states[k].second] = '+';
                    result.push_back(new_states[k]);
                }
            }

            return result;
        };

        state_t start = { i, j };
        if (is_valid(start)) {
            board[i][j] = '+';
            q.push(start);
        }
        while (!q.empty()) {
            auto cur = q.front();
            q.pop();
            auto new_states = state_extend(cur);
            for (auto s : new_states) q.push(s);
        }
    }
};
\end{Code}


\subsubsection{相关题目}

\begindot
\item 无
\myenddot


\section{小结} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\label{sec:bfs-template}


\subsection{适用场景}

\textbf{输入数据}：没什么特征，不像深搜，需要有“递归”的性质。如果是树或者图，概率更大。

\textbf{状态转换图}：树或者图。

\textbf{求解目标}：求最短。


\subsection{思考的步骤}
\begin{enumerate}
\item 是求路径长度，还是路径本身（或动作序列）？
    \begin{enumerate}
    \item 如果是求路径长度，则状态里面要存路径长度（或双队列+一个全局变量）
    \item 如果是求路径本身或动作序列
        \begin{enumerate}
            \item 要用一棵树存储宽搜过程中的路径
            \item 是否可以预估状态个数的上限？能够预估状态总数，则开一个大数组，用树的双亲表示法；如果不能预估状态总数，则要使用一棵通用的树。这一步也是第4步的必要不充分条件。
        \end{enumerate}
    \end{enumerate}

\item 如何表示状态？即一个状态需要存储哪些些必要的数据，才能够完整提供如何扩展到下一步状态的所有信息。一般记录当前位置或整体局面。

\item 如何扩展状态？这一步跟第2步相关。状态里记录的数据不同，扩展方法就不同。对于固定不变的数据结构（一般题目直接给出，作为输入数据），如二叉树，图等，扩展方法很简单，直接往下一层走，对于隐式图，要先在第1步里想清楚状态所带的数据，想清楚了这点，那如何扩展就很简单了。

\item 关于判重，状态是否存在完美哈希方案？即将状态一一映射到整数，互相之间不会冲突。
    \begin{enumerate}
    \item 如果不存在，则需要使用通用的哈希表（自己实现或用标准库，例如\fn{unordered_set}）来判重；自己实现哈希表的话，如果能够预估状态个数的上限，则可以开两个数组，head和next，表示哈希表，参考第 \S \ref{subsec:eightDigits}节方案2。
    \item 如果存在，则可以开一个大布尔数组，作为哈希表来判重，且此时可以精确计算出状态总数，而不仅仅是预估上限。
    \end{enumerate}

\item 目标状态是否已知？如果题目已经给出了目标状态，可以带来很大便利，这时候可以从起始状态出发，正向广搜；也可以从目标状态出发，逆向广搜；也可以同时出发，双向广搜。
\end{enumerate}


\subsection{代码模板}
广搜需要一个队列，用于一层一层扩展，一个hashset，用于判重，一棵树（只求长度时不需要），用于存储整棵树。

对于队列，可以用\fn{queue}，也可以把\fn{vector}当做队列使用。当求长度时，有两种做法：
\begin{enumerate}
\item 只用一个队列，但在状态结构体\fn{state_t}里增加一个整数字段\fn{step}，表示走到当前状态用了多少步，当碰到目标状态，直接输出\fn{step}即可。这个方案，可以很方便的变成A*算法，把队列换成优先队列即可。
\item 用两个队列，\fn{current, next}，分别表示当前层次和下一层，另设一个全局整数\fn{level}，表示层数（也即路径长度），当碰到目标状态，输出\fn{level}即可。这个方案，状态可以少一个字段，节省内存。
\end{enumerate}

对于hashset，如果有完美哈希方案，用布尔数组(\fn{bool visited[STATE_MAX]}或\fn{vector<bool> visited(STATE_MAX, false)})来表示；如果没有，可以用STL里的\fn{set}或\fn{unordered_set}。

对于树，如果用STL，可以用\fn{unordered_map<state_t, state_t > father}表示一颗树，代码非常简洁。如果能够预估状态总数的上限（设为STATE_MAX），可以用数组(\fn{state_t nodes[STATE_MAX]})，即树的双亲表示法来表示树，效率更高，当然，需要写更多代码。


\subsubsection{双队列的写法}
\begin{Codex}[label=bfs_template1.cpp]
/** 状态 */
struct state_t {
    int data1;  /** 状态的数据，可以有多个字段. */
    int data2;  /** 状态的数据，可以有多个字段. */
    // dataN;   /** 其他字段 */
    int action; /** 由父状态移动到本状态的动作，求动作序列时需要. */
    int count;  /** 所花费的步骤数（也即路径长度-1），求路径长度时需要；
                    不过，采用双队列时不需要本字段，只需全局设一个整数 */
    bool operator==(const state_t &other) const {
        return true;  // 根据具体问题实现
    }
};

// 定义hash函数

// 方法1：模板特化，当hash函数只需要状态本身，不需要其他数据时，用这个方法比较简洁
namespace std {
template<> struct hash<state_t> {
    size_t operator()(const state_t & x) const {
        return 0; // 根据具体问题实现
    }
};
}

// 方法2：函数对象，如果hash函数需要运行时数据，则用这种方法
class Hasher {
public:
    Hasher(int _m) : m(_m) {};
    size_t operator()(const state_t &s) const {
        return 0; // 根据具体问题实现
    }
private:
    int m; // 存放外面传入的数据
};


/**
 * @brief 反向生成路径.
 * @param[in] father 树
 * @param[in] target 目标节点
 * @return 从起点到target的路径
 */
template<typename state_t>
vector<state_t> gen_path(const unordered_map<state_t, state_t> &father,
        const state_t &target) {
    vector<state_t> path;
    path.push_back(target);

    for (state_t cur = target; father.find(cur) != father.end(); 
            cur = father.at(cur))
        path.push_back(cur);

    reverse(path.begin(), path.end());

    return path;
}

/**
 * @brief 广搜.
 * @param[in] state_t 状态，如整数，字符串，一维数组等
 * @param[in] start 起点
 * @param[in] grid 输入数据
 * @return 从起点到目标状态的一条最短路径
 */
template<typename state_t>
vector<state_t> bfs(const state_t &start, const vector<vector<int>> &grid) {
    queue<state_t> next, current; // 当前层，下一层
    unordered_set<state_t> visited; // 判重
    unordered_map<state_t, state_t> father; // 树

    int level = 0;  // 层次
    bool found = false; // 是否找到目标
    state_t target; // 符合条件的目标状态

    // 判断当前状态是否为所求目标
    auto state_is_target = [&](const state_t &s) {return true; };
    // 扩展当前状态
    auto state_extend = [&](const state_t &s) {
        vector<state_t> result;
        // ...
        return result;
    };

    current.push(start);
    visited.insert(start);
    while (!current.empty() && !found) {
        ++level;
        while (!current.empty() && !found) {
            const state_t state = current.front();
            current.pop();
            vector<state_t> new_states = state_extend(state);
            for (auto iter = new_states.cbegin();
                    iter != new_states.cend() && ! found; ++iter) {
                const state_t new_state(*iter);

                if (state_is_target(new_state)) {
                    found = true; //找到了
                    target = new_state;
                    father[new_state] = state;
                    break;
                }

                next.push(new_state);
                // visited.insert(new_state); 必须放到 state_extend()里
                father[new_state] = state;
            }
        }
        swap(next, current); //!!! 交换两个队列
    }

    if (found) {
        return gen_path(father, target);
        //return level + 1;
    } else {
        return vector<state_t>();
        //return 0;
    }
}
\end{Codex}


\subsubsection{只用一个队列的写法}
双队列的写法，当求路径长度时，不需要在状态里设置一个\fn{count}字段记录路径长度，只需全局设置一个整数\fn{level}，比较节省内存；只用一个队列的写法，当求路径长度时，需要在状态里设置一个\fn{count}字段，不过，这种写法有一个好处 —— 可以很容易的变为A*算法，把\fn{queue}替换为\fn{priority_queue}即可。

\begin{Codex}[label=bfs_template2.cpp]
// 与模板1相同的部分，不再重复
// ...

/**
 * @brief 广搜.
 * @param[in] state_t 状态，如整数，字符串，一维数组等
 * @param[in] start 起点
 * @param[in] grid 输入数据
 * @return 从起点到目标状态的一条最短路径
 */
template<typename state_t>
vector<state_t> bfs(state_t &start, const vector<vector<int>> &grid) {
    queue<state_t> q; // 队列
    unordered_set<state_t> visited; // 判重
    unordered_map<state_t, state_t> father; // 树

    int level = 0;  // 层次
    bool found = false; // 是否找到目标
    state_t target; // 符合条件的目标状态

    // 判断当前状态是否为所求目标
    auto state_is_target = [&](const state_t &s) {return true; };
    // 扩展当前状态
    auto state_extend = [&](const state_t &s) {
        vector<state_t> result;
        // ...
        return result;
    };

    start.count = 0;
    q.push(start);
    visited.insert(start);
    while (!q.empty() && !found) {
        const state_t state = q.front();
        q.pop();
        vector<state_t> new_states = state_extend(state);
        for (auto iter = new_states.cbegin();
                iter != new_states.cend() && ! found; ++iter) {
            const state_t new_state(*iter);

            if (state_is_target(new_state)) {
                found = true; //找到了
                target = new_state;
                father[new_state] = state;
                break;
            }

            q.push(new_state);
            // visited.insert(new_state); 必须放到 state_extend()里
            father[new_state] = state;
        }
    }

    if (found) {
        return gen_path(father, target);
        //return level + 1;
    } else {
        return vector<state_t>();
        //return 0;
    }
}
\end{Codex}