forked from krahets/hello-algo
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add the section of Top-K problem (krahets#551)
* Add the section of Top-K problem * Update my_heap.py * Update build_heap.md * Update my_heap.py
- Loading branch information
Showing
22 changed files
with
266 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -151,4 +151,6 @@ int main() { | |
/* 判断堆是否为空 */ | ||
bool isEmpty = maxHeap.empty(); | ||
cout << "\n堆是否为空 " << isEmpty << endl; | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/** | ||
* File: top_k.cpp | ||
* Created Time: 2023-06-12 | ||
* Author: Krahets ([email protected]) | ||
*/ | ||
|
||
#include "../utils/common.hpp" | ||
|
||
/* 基于堆查找数组中最大的 k 个元素 */ | ||
priority_queue<int, vector<int>, greater<int>> topKHeap(vector<int> &nums, int k) { | ||
priority_queue<int, vector<int>, greater<int>> heap; | ||
// 将数组的前 k 个元素入堆 | ||
for (int i = 0; i < k; i++) { | ||
heap.push(nums[i]); | ||
} | ||
// 从第 k+1 个元素开始,保持堆的长度为 k | ||
for (int i = k; i < nums.size(); i++) { | ||
// 若当前元素大于堆顶元素,则将堆顶元素出堆、当前元素入堆 | ||
if (nums[i] > heap.top()) { | ||
heap.pop(); | ||
heap.push(nums[i]); | ||
} | ||
} | ||
return heap; | ||
} | ||
|
||
// Driver Code | ||
int main() { | ||
vector<int> nums = {1, 7, 6, 3, 2}; | ||
int k = 3; | ||
|
||
priority_queue<int, vector<int>, greater<int>> res = topKHeap(nums, k); | ||
cout << "最大的 " << k << " 个元素为: "; | ||
printHeap(res); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/** | ||
* File: top_k.java | ||
* Created Time: 2023-06-12 | ||
* Author: Krahets ([email protected]) | ||
*/ | ||
|
||
package chapter_heap; | ||
|
||
import utils.*; | ||
import java.util.*; | ||
|
||
public class top_k { | ||
/* 基于堆查找数组中最大的 k 个元素 */ | ||
static Queue<Integer> topKHeap(int[] nums, int k) { | ||
Queue<Integer> heap = new PriorityQueue<Integer>(); | ||
// 将数组的前 k 个元素入堆 | ||
for (int i = 0; i < k; i++) { | ||
heap.add(nums[i]); | ||
} | ||
// 从第 k+1 个元素开始,保持堆的长度为 k | ||
for (int i = k; i < nums.length; i++) { | ||
// 若当前元素大于堆顶元素,则将堆顶元素出堆、当前元素入堆 | ||
if (nums[i] > heap.peek()) { | ||
heap.poll(); | ||
heap.add(nums[i]); | ||
} | ||
} | ||
return heap; | ||
} | ||
|
||
public static void main(String[] args) { | ||
int[] nums = { 1, 7, 6, 3, 2 }; | ||
int k = 3; | ||
|
||
Queue<Integer> res = topKHeap(nums, k); | ||
System.out.println("最大的 " + k + " 个元素为"); | ||
PrintUtil.printHeap(res); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
""" | ||
File: top_k.py | ||
Created Time: 2023-06-10 | ||
Author: Krahets ([email protected]) | ||
""" | ||
|
||
import sys, os.path as osp | ||
|
||
sys.path.append(osp.dirname(osp.dirname(osp.abspath(__file__)))) | ||
from modules import * | ||
|
||
import heapq | ||
|
||
|
||
def top_k_heap(nums: list[int], k: int) -> list[int]: | ||
"""基于堆查找数组中最大的 k 个元素""" | ||
heap = [] | ||
# 将数组的前 k 个元素入堆 | ||
for i in range(k): | ||
heapq.heappush(heap, nums[i]) | ||
# 从第 k+1 个元素开始,保持堆的长度为 k | ||
for i in range(k, len(nums)): | ||
# 若当前元素大于堆顶元素,则将堆顶元素出堆、当前元素入堆 | ||
if nums[i] > heap[0]: | ||
heapq.heappop(heap) | ||
heapq.heappush(heap, nums[i]) | ||
return heap | ||
|
||
|
||
"""Driver Code""" | ||
if __name__ == "__main__": | ||
nums = [1, 7, 6, 3, 2] | ||
k = 3 | ||
|
||
res = top_k_heap(nums, k) | ||
print(f"最大的 {k} 个元素为") | ||
print_heap(res) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# 建堆操作 * | ||
# 建堆操作 | ||
|
||
如果我们想要根据输入列表生成一个堆,这个过程被称为「建堆」。 | ||
|
||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# Top-K 问题 | ||
|
||
!!! question | ||
|
||
给定一个长度为 $n$ 无序数组 `nums` ,请返回数组中前 $k$ 大的元素。 | ||
|
||
对于该问题,我们先介绍两种思路比较直接的解法,再介绍效率更高的堆解法。 | ||
|
||
## 方法一:遍历选择 | ||
|
||
我们可以进行 $k$ 轮遍历,分别在每轮中提取第 $1$ , $2$ , $\cdots$ , $k$ 大的元素,时间复杂度为 $O(nk)$ 。 | ||
|
||
该方法只适用于 $k \ll n$ 的情况,因为当 $k$ 与 $n$ 比较接近时,其时间复杂度趋向于 $O(n^2)$ ,非常耗时。 | ||
|
||
![遍历寻找最大的 $k$ 个元素](top_k.assets/top_k_traversal.png) | ||
|
||
!!! tip | ||
|
||
当 $k = n$ 时,我们可以得到从大到小的序列,等价于「选择排序」算法。 | ||
|
||
## 方法二:排序 | ||
|
||
我们可以对数组 `nums` 进行排序,并返回最右边的 $k$ 个元素,时间复杂度为 $O(n \log n)$ 。 | ||
|
||
显然,该方法“超额”完成任务了,因为我们只需要找出最大的 $k$ 个元素即可,而不需要排序其他元素。 | ||
|
||
![排序寻找最大的 $k$ 个元素](top_k.assets/top_k_sorting.png) | ||
|
||
## 方法三:堆 | ||
|
||
我们可以基于堆更加高效地解决 Top-K 问题,流程如下: | ||
|
||
1. 初始化一个小顶堆,其堆顶元素最小; | ||
2. 先将数组的前 $k$ 个元素依次入堆; | ||
3. 从第 $k + 1$ 个元素开始,若当前元素大于堆顶元素,则将堆顶元素出堆,并将当前元素入堆; | ||
4. 遍历完成后,堆中保存的就是最大的 $k$ 个元素; | ||
|
||
=== "<1>" | ||
![基于堆寻找最大的 $k$ 个元素](top_k.assets/top_k_heap_step1.png) | ||
|
||
=== "<2>" | ||
![top_k_heap_step2](top_k.assets/top_k_heap_step2.png) | ||
|
||
=== "<3>" | ||
![top_k_heap_step3](top_k.assets/top_k_heap_step3.png) | ||
|
||
=== "<4>" | ||
![top_k_heap_step4](top_k.assets/top_k_heap_step4.png) | ||
|
||
=== "<5>" | ||
![top_k_heap_step5](top_k.assets/top_k_heap_step5.png) | ||
|
||
=== "<6>" | ||
![top_k_heap_step6](top_k.assets/top_k_heap_step6.png) | ||
|
||
=== "<7>" | ||
![top_k_heap_step7](top_k.assets/top_k_heap_step7.png) | ||
|
||
=== "<8>" | ||
![top_k_heap_step8](top_k.assets/top_k_heap_step8.png) | ||
|
||
=== "<9>" | ||
![top_k_heap_step9](top_k.assets/top_k_heap_step9.png) | ||
|
||
总共执行了 $n$ 轮入堆和出堆,堆的最大长度为 $k$ ,因此时间复杂度为 $O(n \log k)$ 。该方法的效率很高,当 $k$ 较小时,时间复杂度趋向 $O(n)$ ;当 $k$ 较大时,时间复杂度不会超过 $O(n \log n)$ 。 | ||
|
||
另外,该方法适用于动态数据流的使用场景。在不断加入数据时,我们可以持续维护堆内的元素,从而实现最大 $k$ 个元素的动态更新。 | ||
|
||
=== "Java" | ||
|
||
```java title="top_k.java" | ||
[class]{top_k}-[func]{topKHeap} | ||
``` | ||
|
||
=== "C++" | ||
|
||
```cpp title="top_k.cpp" | ||
[class]{}-[func]{topKHeap} | ||
``` | ||
|
||
=== "Python" | ||
|
||
```python title="top_k.py" | ||
[class]{}-[func]{top_k_heap} | ||
``` | ||
|
||
=== "Go" | ||
|
||
```go title="top_k.go" | ||
[class]{maxHeap}-[func]{topKHeap} | ||
``` | ||
|
||
=== "JavaScript" | ||
|
||
```javascript title="top_k.js" | ||
[class]{}-[func]{topKHeap} | ||
``` | ||
|
||
=== "TypeScript" | ||
|
||
```typescript title="top_k.ts" | ||
[class]{}-[func]{topKHeap} | ||
``` | ||
|
||
=== "C" | ||
|
||
```c title="top_k.c" | ||
[class]{maxHeap}-[func]{topKHeap} | ||
``` | ||
|
||
=== "C#" | ||
|
||
```csharp title="top_k.cs" | ||
[class]{top_k}-[func]{topKHeap} | ||
``` | ||
|
||
=== "Swift" | ||
|
||
```swift title="top_k.swift" | ||
[class]{}-[func]{topKHeap} | ||
``` | ||
|
||
=== "Zig" | ||
|
||
```zig title="top_k.zig" | ||
[class]{}-[func]{topKHeap} | ||
``` | ||
|
||
=== "Dart" | ||
|
||
```dart title="top_k.dart" | ||
[class]{}-[func]{top_k_heap} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters