|
| 1 | +#!/usr/bin/python3 |
| 2 | +""" |
| 3 | +Median is the middle value in an ordered integer list. If the size of the list |
| 4 | +is even, there is no middle value. So the median is the mean of the two middle |
| 5 | +value. |
| 6 | +
|
| 7 | +Examples: |
| 8 | +[2,3,4] , the median is 3 |
| 9 | +
|
| 10 | +[2,3], the median is (2 + 3) / 2 = 2.5 |
| 11 | +
|
| 12 | +Given an array nums, there is a sliding window of size k which is moving from |
| 13 | +the very left of the array to the very right. You can only see the k numbers in |
| 14 | +the window. Each time the sliding window moves right by one position. Your job |
| 15 | +is to output the median array for each window in the original array. |
| 16 | +
|
| 17 | +For example, |
| 18 | +Given nums = [1,3,-1,-3,5,3,6,7], and k = 3. |
| 19 | +
|
| 20 | +Window position Median |
| 21 | +--------------- ----- |
| 22 | +[1 3 -1] -3 5 3 6 7 1 |
| 23 | + 1 [3 -1 -3] 5 3 6 7 -1 |
| 24 | + 1 3 [-1 -3 5] 3 6 7 -1 |
| 25 | + 1 3 -1 [-3 5 3] 6 7 3 |
| 26 | + 1 3 -1 -3 [5 3 6] 7 5 |
| 27 | + 1 3 -1 -3 5 [3 6 7] 6 |
| 28 | +Therefore, return the median sliding window as [1,-1,-1,3,5,6]. |
| 29 | +
|
| 30 | +Note: |
| 31 | +You may assume k is always valid, ie: k is always smaller than input array's |
| 32 | +size for non-empty array. |
| 33 | +""" |
| 34 | +from typing import List |
| 35 | +import heapq |
| 36 | + |
| 37 | + |
| 38 | +class DualHeap: |
| 39 | + def __init__(self): |
| 40 | + """ |
| 41 | + ---- number line ---> |
| 42 | + --- max heap --- | --- min heap --- |
| 43 | + """ |
| 44 | + self.max_h = [] # List[Tuple[comparator, num]] |
| 45 | + self.min_h = [] |
| 46 | + self.max_sz = 0 |
| 47 | + self.min_sz = 0 |
| 48 | + self.to_remove = set() # value, error mapping index in nums |
| 49 | + |
| 50 | + def insert(self, num): |
| 51 | + if self.max_h and num > self.max_h[0][1]: |
| 52 | + heapq.heappush(self.min_h, (num, num)) |
| 53 | + self.min_sz += 1 |
| 54 | + else: |
| 55 | + heapq.heappush(self.max_h, (-num, num)) |
| 56 | + self.max_sz += 1 |
| 57 | + self.balance() |
| 58 | + |
| 59 | + def pop(self, num): |
| 60 | + self.to_remove.add(num) |
| 61 | + if self.max_h and num > self.max_h[0][1]: |
| 62 | + self.min_sz -= 1 |
| 63 | + else: |
| 64 | + self.max_sz -= 1 |
| 65 | + self.balance() |
| 66 | + |
| 67 | + def clean_top(self): |
| 68 | + while self.max_h and self.max_h[0][1] in self.to_remove: |
| 69 | + _, num = heapq.heappop(self.max_h) |
| 70 | + self.to_remove.remove(num) |
| 71 | + while self.min_h and self.min_h[0][1] in self.to_remove: |
| 72 | + _, num = heapq.heappop(self.min_h) |
| 73 | + self.to_remove.remove(num) |
| 74 | + |
| 75 | + def balance(self): |
| 76 | + # keep skew in max sz |
| 77 | + while self.max_sz < self.min_sz : |
| 78 | + self.clean_top() |
| 79 | + _, num =heapq.heappop(self.min_h) |
| 80 | + heapq.heappush(self.max_h, (-num, num)) |
| 81 | + self.min_sz -= 1 |
| 82 | + self.max_sz += 1 |
| 83 | + while self.max_sz > self.min_sz + 1: |
| 84 | + self.clean_top() |
| 85 | + _, num = heapq.heappop(self.max_h) |
| 86 | + heapq.heappush(self.min_h, (num, num)) |
| 87 | + self.min_sz += 1 |
| 88 | + self.max_sz -= 1 |
| 89 | + |
| 90 | + self.clean_top() |
| 91 | + |
| 92 | + def get_median(self, k): |
| 93 | + self.clean_top() |
| 94 | + if k % 2 == 1: |
| 95 | + return self.max_h[0][1] |
| 96 | + else: |
| 97 | + return 0.5 * (self.max_h[0][1] + self.min_h[0][1]) |
| 98 | + |
| 99 | + |
| 100 | +class Solution: |
| 101 | + def medianSlidingWindow(self, nums: List[int], k: int) -> List[float]: |
| 102 | + """ |
| 103 | + 1. BST, proxied by bisect |
| 104 | + dual heap + lazy removal + balance the valid element |
| 105 | +
|
| 106 | + --- max heap --- | --- min heap --- |
| 107 | + but need to delete the start of the window |
| 108 | +
|
| 109 | + Lazy Removal with the help of hash table of idx -> remove? |
| 110 | + Hash table mapping idx will fail |
| 111 | + Remove by index will introduce bug for test case [1,1,1,1], 2: when poping, |
| 112 | + we cannot know which heap to go to by index since decision of which heap to pop |
| 113 | + is only about value. |
| 114 | +
|
| 115 | + Calculating median also doesn't care about index, it only cares about value |
| 116 | + """ |
| 117 | + ret = [] |
| 118 | + dh = DualHeap() |
| 119 | + for i in range(k): |
| 120 | + dh.insert(nums[i]) |
| 121 | + |
| 122 | + ret.append(dh.get_median(k)) |
| 123 | + |
| 124 | + for i in range(k, len(nums)): |
| 125 | + dh.insert(nums[i]) |
| 126 | + dh.pop(nums[i-k]) |
| 127 | + ret.append(dh.get_median(k)) |
| 128 | + |
| 129 | + return ret |
| 130 | + |
| 131 | + |
| 132 | +if __name__ == "__main__": |
| 133 | + assert Solution().medianSlidingWindow([-2147483648,-2147483648,2147483647,-2147483648,-2147483648,-2147483648,2147483647,2147483647,2147483647,2147483647,-2147483648,2147483647,-2147483648], 2) |
| 134 | + assert Solution().medianSlidingWindow([1,1,1,1], 2) == [1, 1, 1] |
| 135 | + assert Solution().medianSlidingWindow([1,3,-1,-3,5,3,6,7], 3) == [1,-1,-1,3,5,6] |
0 commit comments