Skip to content

Commit

Permalink
feat: Add the section of Top-K problem (krahets#551)
Browse files Browse the repository at this point in the history
* Add the section of Top-K problem

* Update my_heap.py

* Update build_heap.md

* Update my_heap.py
  • Loading branch information
krahets authored Jun 12, 2023
1 parent 9de5d0b commit a111b94
Show file tree
Hide file tree
Showing 22 changed files with 266 additions and 16 deletions.
3 changes: 2 additions & 1 deletion codes/cpp/chapter_heap/heap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,6 @@ int main() {
priority_queue<int, vector<int>, greater<int>> minHeap(input.begin(), input.end());
cout << "输入列表并建立小顶堆后" << endl;
printHeap(minHeap);

return 0;
}
}
2 changes: 2 additions & 0 deletions codes/cpp/chapter_heap/my_heap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,6 @@ int main() {
/* 判断堆是否为空 */
bool isEmpty = maxHeap.empty();
cout << "\n堆是否为空 " << isEmpty << endl;

return 0;
}
37 changes: 37 additions & 0 deletions codes/cpp/chapter_heap/top_k.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/**
* File: top_k.cpp
* Created Time: 2023-06-12
* Author: Krahets ([email protected])
*/

#include "../utils/common.hpp"

/* 基于堆查找数组中最大的 k 个元素 */
priority_queue<int, vector<int>, greater<int>> topKHeap(vector<int> &nums, int k) {
priority_queue<int, vector<int>, greater<int>> heap;
// 将数组的前 k 个元素入堆
for (int i = 0; i < k; i++) {
heap.push(nums[i]);
}
// 从第 k+1 个元素开始,保持堆的长度为 k
for (int i = k; i < nums.size(); i++) {
// 若当前元素大于堆顶元素,则将堆顶元素出堆、当前元素入堆
if (nums[i] > heap.top()) {
heap.pop();
heap.push(nums[i]);
}
}
return heap;
}

// Driver Code
int main() {
vector<int> nums = {1, 7, 6, 3, 2};
int k = 3;

priority_queue<int, vector<int>, greater<int>> res = topKHeap(nums, k);
cout << "最大的 " << k << " 个元素为: ";
printHeap(res);

return 0;
}
20 changes: 10 additions & 10 deletions codes/cpp/utils/print_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,6 @@
#include <sstream>
#include <string>

/* Expose the underlying storage of the priority_queue container */
template <typename T, typename S, typename C> S &Container(priority_queue<T, S, C> &pq) {
struct HackedQueue : private priority_queue<T, S, C> {
static S &Container(priority_queue<T, S, C> &pq) {
return pq.*&HackedQueue::c;
}
};
return HackedQueue::Container(pq);
}

/* Find an element in a vector */
template <typename T> int vecFind(const vector<T> &vec, T ele) {
int j = INT_MAX;
Expand Down Expand Up @@ -217,6 +207,16 @@ template <typename TKey, typename TValue> void printHashMap(unordered_map<TKey,
}
}

/* Expose the underlying storage of the priority_queue container */
template <typename T, typename S, typename C> S &Container(priority_queue<T, S, C> &pq) {
struct HackedQueue : private priority_queue<T, S, C> {
static S &Container(priority_queue<T, S, C> &pq) {
return pq.*&HackedQueue::c;
}
};
return HackedQueue::Container(pq);
}

/* Print a Heap (PriorityQueue) */
template <typename T, typename S, typename C> void printHeap(priority_queue<T, S, C> &heap) {
vector<T> vec = Container(heap);
Expand Down
39 changes: 39 additions & 0 deletions codes/java/chapter_heap/top_k.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* File: top_k.java
* Created Time: 2023-06-12
* Author: Krahets ([email protected])
*/

package chapter_heap;

import utils.*;
import java.util.*;

public class top_k {
/* 基于堆查找数组中最大的 k 个元素 */
static Queue<Integer> topKHeap(int[] nums, int k) {
Queue<Integer> heap = new PriorityQueue<Integer>();
// 将数组的前 k 个元素入堆
for (int i = 0; i < k; i++) {
heap.add(nums[i]);
}
// 从第 k+1 个元素开始,保持堆的长度为 k
for (int i = k; i < nums.length; i++) {
// 若当前元素大于堆顶元素,则将堆顶元素出堆、当前元素入堆
if (nums[i] > heap.peek()) {
heap.poll();
heap.add(nums[i]);
}
}
return heap;
}

public static void main(String[] args) {
int[] nums = { 1, 7, 6, 3, 2 };
int k = 3;

Queue<Integer> res = topKHeap(nums, k);
System.out.println("最大的 " + k + " 个元素为");
PrintUtil.printHeap(res);
}
}
2 changes: 1 addition & 1 deletion codes/python/chapter_heap/my_heap.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class MaxHeap:
"""大顶堆"""

def __init__(self, nums: list[int]):
"""构造方法"""
"""构造方法,根据输入列表建堆"""
# 将列表元素原封不动添加进堆
self.max_heap = nums
# 堆化除叶节点以外的其他所有节点
Expand Down
37 changes: 37 additions & 0 deletions codes/python/chapter_heap/top_k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
File: top_k.py
Created Time: 2023-06-10
Author: Krahets ([email protected])
"""

import sys, os.path as osp

sys.path.append(osp.dirname(osp.dirname(osp.abspath(__file__))))
from modules import *

import heapq


def top_k_heap(nums: list[int], k: int) -> list[int]:
"""基于堆查找数组中最大的 k 个元素"""
heap = []
# 将数组的前 k 个元素入堆
for i in range(k):
heapq.heappush(heap, nums[i])
# 从第 k+1 个元素开始,保持堆的长度为 k
for i in range(k, len(nums)):
# 若当前元素大于堆顶元素,则将堆顶元素出堆、当前元素入堆
if nums[i] > heap[0]:
heapq.heappop(heap)
heapq.heappush(heap, nums[i])
return heap


"""Driver Code"""
if __name__ == "__main__":
nums = [1, 7, 6, 3, 2]
k = 3

res = top_k_heap(nums, k)
print(f"最大的 {k} 个元素为")
print_heap(res)
2 changes: 1 addition & 1 deletion docs/chapter_heap/build_heap.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 建堆操作 *
# 建堆操作

如果我们想要根据输入列表生成一个堆,这个过程被称为「建堆」。

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/chapter_heap/top_k.assets/top_k_sorting.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
133 changes: 133 additions & 0 deletions docs/chapter_heap/top_k.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Top-K 问题

!!! question

给定一个长度为 $n$ 无序数组 `nums` ,请返回数组中前 $k$ 大的元素。

对于该问题,我们先介绍两种思路比较直接的解法,再介绍效率更高的堆解法。

## 方法一:遍历选择

我们可以进行 $k$ 轮遍历,分别在每轮中提取第 $1$ , $2$ , $\cdots$ , $k$ 大的元素,时间复杂度为 $O(nk)$ 。

该方法只适用于 $k \ll n$ 的情况,因为当 $k$ 与 $n$ 比较接近时,其时间复杂度趋向于 $O(n^2)$ ,非常耗时。

![遍历寻找最大的 $k$ 个元素](top_k.assets/top_k_traversal.png)

!!! tip

当 $k = n$ 时,我们可以得到从大到小的序列,等价于「选择排序」算法。

## 方法二:排序

我们可以对数组 `nums` 进行排序,并返回最右边的 $k$ 个元素,时间复杂度为 $O(n \log n)$ 。

显然,该方法“超额”完成任务了,因为我们只需要找出最大的 $k$ 个元素即可,而不需要排序其他元素。

![排序寻找最大的 $k$ 个元素](top_k.assets/top_k_sorting.png)

## 方法三:堆

我们可以基于堆更加高效地解决 Top-K 问题,流程如下:

1. 初始化一个小顶堆,其堆顶元素最小;
2. 先将数组的前 $k$ 个元素依次入堆;
3. 从第 $k + 1$ 个元素开始,若当前元素大于堆顶元素,则将堆顶元素出堆,并将当前元素入堆;
4. 遍历完成后,堆中保存的就是最大的 $k$ 个元素;

=== "<1>"
![基于堆寻找最大的 $k$ 个元素](top_k.assets/top_k_heap_step1.png)

=== "<2>"
![top_k_heap_step2](top_k.assets/top_k_heap_step2.png)

=== "<3>"
![top_k_heap_step3](top_k.assets/top_k_heap_step3.png)

=== "<4>"
![top_k_heap_step4](top_k.assets/top_k_heap_step4.png)

=== "<5>"
![top_k_heap_step5](top_k.assets/top_k_heap_step5.png)

=== "<6>"
![top_k_heap_step6](top_k.assets/top_k_heap_step6.png)

=== "<7>"
![top_k_heap_step7](top_k.assets/top_k_heap_step7.png)

=== "<8>"
![top_k_heap_step8](top_k.assets/top_k_heap_step8.png)

=== "<9>"
![top_k_heap_step9](top_k.assets/top_k_heap_step9.png)

总共执行了 $n$ 轮入堆和出堆,堆的最大长度为 $k$ ,因此时间复杂度为 $O(n \log k)$ 。该方法的效率很高,当 $k$ 较小时,时间复杂度趋向 $O(n)$ ;当 $k$ 较大时,时间复杂度不会超过 $O(n \log n)$ 。

另外,该方法适用于动态数据流的使用场景。在不断加入数据时,我们可以持续维护堆内的元素,从而实现最大 $k$ 个元素的动态更新。

=== "Java"

```java title="top_k.java"
[class]{top_k}-[func]{topKHeap}
```

=== "C++"

```cpp title="top_k.cpp"
[class]{}-[func]{topKHeap}
```

=== "Python"

```python title="top_k.py"
[class]{}-[func]{top_k_heap}
```

=== "Go"

```go title="top_k.go"
[class]{maxHeap}-[func]{topKHeap}
```

=== "JavaScript"

```javascript title="top_k.js"
[class]{}-[func]{topKHeap}
```

=== "TypeScript"

```typescript title="top_k.ts"
[class]{}-[func]{topKHeap}
```

=== "C"

```c title="top_k.c"
[class]{maxHeap}-[func]{topKHeap}
```

=== "C#"

```csharp title="top_k.cs"
[class]{top_k}-[func]{topKHeap}
```

=== "Swift"

```swift title="top_k.swift"
[class]{}-[func]{topKHeap}
```

=== "Zig"

```zig title="top_k.zig"
[class]{}-[func]{topKHeap}
```

=== "Dart"

```dart title="top_k.dart"
[class]{}-[func]{top_k_heap}
```
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ hide:

<h3 align="left"> 作者简介 </h3>

靳宇栋 (Krahets),大厂高级算法工程师,上海交通大学硕士。力扣(LeetCode)全网阅读量最高博主,其 LeetBook《图解算法数据结构》已被订阅 22 万本。
靳宇栋 (Krahets),大厂高级算法工程师,上海交通大学硕士。力扣(LeetCode)全网阅读量最高博主,其 LeetBook《图解算法数据结构》已被订阅 24 万本。

---

Expand Down
5 changes: 3 additions & 2 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,9 @@ nav:
- 8. &nbsp; &nbsp; 堆:
- chapter_heap/index.md
- 8.1. &nbsp; 堆: chapter_heap/heap.md
- 8.2. &nbsp; 建堆操作 *: chapter_heap/build_heap.md
- 8.3. &nbsp; 小结: chapter_heap/summary.md
- 8.2. &nbsp; 建堆操作: chapter_heap/build_heap.md
- 8.3. &nbsp; Top-K 问题: chapter_heap/top_k.md
- 8.4. &nbsp; 小结: chapter_heap/summary.md
- 9. &nbsp; &nbsp; 图:
- chapter_graph/index.md
- 9.1. &nbsp; 图: chapter_graph/graph.md
Expand Down

0 comments on commit a111b94

Please sign in to comment.