add top k problem

billryan · billryan · commit 5a0bca85441b · 2018-08-10T00:15:55.000+08:00
diff --git a/zh-hans/SUMMARY.md b/zh-hans/SUMMARY.md
@@ -221,6 +221,10 @@
 * [Big Data](bigdata/README.md)
    * [Top K Frequent Words (Map Reduce)](bigdata/top_k_frequent_words_map_reduce.md)
    * [Top K Frequent Words](bigdata/top_k_frequent_words.md)
+   * [Top K Frequent Words II](bigdata/top_k_frequent_words_ii.md)
+   * [K Closest Points](bigdata/k_closest_points.md)
+   * [Top k Largest Numbers](bigdata/top_k_largest_numbers.md)
+   * [Top k Largest Numbers II](bigdata/top_k_largest_numbers_ii.md)
 * [Problem Misc](problem_misc/README.md)
    * [Nuts and Bolts Problem](problem_misc/nuts_and_bolts_problem.md)
    * [String to Integer](problem_misc/string_to_integer.md)
diff --git a/zh-hans/bigdata/k_closest_points.md b/zh-hans/bigdata/k_closest_points.md
@@ -0,0 +1,114 @@
+---
+difficulty: Medium
+tags:
+- Heap
+- Amazon
+- LinkedIn
+title: K Closest Points
+---
+
+# K Closest Points
+
+## Problem
+
+### Metadata
+
+- tags: Heap, Amazon, LinkedIn
+- difficulty: Medium
+- source(lintcode): <https://www.lintcode.com/problem/k-closest-points/>
+
+### Description
+
+Given some `points` and a point `origin` in two dimensional space, find `k` points out of the some points which are nearest to `origin`.
+Return these points sorted by distance, if they are same with distance, sorted by x-axis, otherwise sorted by y-axis.
+
+#### Example
+
+Given points = `[[4,6],[4,7],[4,4],[2,5],[1,1]]`, origin = `[0, 0]`, k = `3`
+return `[[1,1],[2,5],[4,4]]`
+
+## 题解
+
+和普通的字符串及数目比较，此题为距离的比较。
+
+### Java
+
+```java
+/**
+ * Definition for a point.
+ * class Point {
+ *     int x;
+ *     int y;
+ *     Point() { x = 0; y = 0; }
+ *     Point(int a, int b) { x = a; y = b; }
+ * }
+ */
+
+public class Solution {
+    /**
+     * @param points: a list of points
+     * @param origin: a point
+     * @param k: An integer
+     * @return: the k closest points
+     */
+    public Point[] kClosest(Point[] points, Point origin, int k) {
+        // write your code here
+        Queue<Point> heap = new PriorityQueue<Point>(new DistanceComparator(origin));
+        for (Point point : points) {
+            if (heap.size() < k) {
+                heap.offer(point);
+            } else {
+                Point peek = heap.peek();
+                if (distance(peek, origin) <= distance(point, origin)) {
+                    continue;
+                } else {
+                    heap.poll();
+                    heap.offer(point);
+                }
+            }
+        }
+
+        int minK = Math.min(k, heap.size());
+        Point[] kClosestPoints = new Point[minK];
+        for (int i = 1; i <= minK; i++) {
+            kClosestPoints[minK - i] = heap.poll();
+        }
+
+        return kClosestPoints;
+    }
+
+    public int distance(Point p, Point origin) {
+        return (p.x - origin.x) * (p.x - origin.x) + 
+               (p.y - origin.y) * (p.y - origin.y);
+    }
+
+    class DistanceComparator implements Comparator<Point> {
+        private Point origin = null;
+        public DistanceComparator(Point origin) {
+            this.origin = origin;
+        }
+
+        public int compare(Point p1, Point p2) {
+            int d1 = distance(p1, origin);
+            int d2 = distance(p2, origin);
+            if (d1 != d2) {
+                return d2 - d1;
+            } else {
+                if (p1.x != p2.x) {
+                    return p2.x - p1.x;
+                } else {
+                    return p2.y - p1.y;
+                }
+            }
+        }
+    }
+}
+```
+
+### 源码分析
+
+注意 Comparator 的用法和大小根堆的选择即可。
+
+### 复杂度分析
+
+堆的删除插入操作，最大为 K, 故时间复杂度为 $$O(n \log k)$$, 空间复杂度为 $$O(K)$$.
diff --git a/zh-hans/bigdata/top_k_frequent_words_ii.md b/zh-hans/bigdata/top_k_frequent_words_ii.md
@@ -0,0 +1,120 @@
+---
+difficulty: Hard
+tags:
+- Heap
+- Data Structure Design
+- Hash Table
+title: Top K Frequent Words II
+---
+
+# Top K Frequent Words II
+
+## Problem
+
+### Metadata
+
+- tags: Heap, Data Structure Design, Hash Table
+- difficulty: Hard
+- source(lintcode): <https://www.lintcode.com/problem/top-k-frequent-words-ii/>
+
+### Description
+
+Find top *k* frequent words in realtime data stream.
+
+Implement three methods for *Topk* Class:
+
+1. `TopK(k)`. The constructor.
+2. `add(word)`. Add a new word.
+3. `topk()`. Get the current top *k* frequent words.
+
+#### Notice
+
+If two words have the same frequency, rank them by alphabet.
+
+#### Example
+
+```
+TopK(2)
+add("lint")
+add("code")
+add("code")
+topk()
+>> ["code", "lint"]
+```
+
+## 题解
+
+此题较难，实际上和 Redis 的有序集合类似，综合使用字典和排序集合可完美解决。
+
+### Java
+
+```java
+public class TopK {
+    private int k;
+    private Map<String, Integer> wordFreq = null;
+    private TreeSet<String> topkSet = null;
+
+    class TopkComparator implements Comparator<String> {
+        public int compare(String s1, String s2) {
+            int s1Freq = wordFreq.get(s1), s2Freq = wordFreq.get(s2);
+            if (s1Freq != s2Freq) {
+                return s2Freq - s1Freq;
+            } else {
+                return s1.compareTo(s2);
+            }
+        }
+    }
+
+    /*
+    * @param k: An integer
+    */public TopK(int k) {
+        // do intialization if necessary
+        this.k = k;
+        wordFreq = new HashMap<String, Integer>(k);
+        topkSet = new TreeSet<String>(new TopkComparator());
+    }
+
+    /*
+     * @param word: A string
+     * @return: nothing
+     */
+    public void add(String word) {
+        // write your code here
+        if (wordFreq.containsKey(word)) {
+            if (topkSet.contains(word)) {
+                topkSet.remove(word);
+            }
+            wordFreq.put(word, wordFreq.get(word) + 1);
+        } else {
+            wordFreq.put(word, 1);
+        }
+
+        topkSet.add(word);
+        if (topkSet.size() > k) {
+            topkSet.pollLast();
+        }
+    }
+
+    /*
+     * @return: the current top k frequent words.
+     */
+    public List<String> topk() {
+        // write your code here
+        List<String> result = new ArrayList<String>(k);
+        Iterator<String> it = topkSet.iterator();
+        while (it.hasNext()) {
+            result.add(it.next());
+        }
+
+        return result;
+    }
+}
+```
+
+### 源码分析
+
+略
+
+### 复杂度分析
+
+待续
diff --git a/zh-hans/bigdata/top_k_largest_numbers.md b/zh-hans/bigdata/top_k_largest_numbers.md
@@ -0,0 +1,65 @@
+---
+difficulty: Medium
+tags:
+- Priority Queue
+- Heap
+title: Top k Largest Numbers
+---
+
+# Top k Largest Numbers
+
+## Problem
+
+### Metadata
+
+- tags: Priority Queue, Heap
+- difficulty: Medium
+- source(lintcode): <https://www.lintcode.com/problem/top-k-largest-numbers/>
+
+### Description
+
+Given an integer array, find the top *k* largest numbers in it.
+
+#### Example
+
+Given `[3,10,1000,-99,4,100]` and *k* = `3`.
+Return `[1000, 100, 10]`.
+
+## 题解
+
+简单题，使用堆即可。
+
+### Java
+
+```java
+public class Solution {
+    /**
+     * @param nums: an integer array
+     * @param k: An integer
+     * @return: the top k largest numbers in array
+     */
+    public int[] topk(int[] nums, int k) {
+        if (nums == null || nums.length <= 1) return nums;
+
+        PriorityQueue<Integer> pq = new PriorityQueue<Integer>(nums.length, Collections.reverseOrder());
+        for (int num : nums) {
+            pq.offer(num);
+        }
+
+        int[] maxK = new int[k];
+        for (int i = 0; i < k; i++) {
+            maxK[i] = pq.poll();
+        }
+
+        return maxK;
+    }
+}
+```
+
+### 源码分析
+
+略
+
+### 复杂度分析
+
+略
diff --git a/zh-hans/bigdata/top_k_largest_numbers_ii.md b/zh-hans/bigdata/top_k_largest_numbers_ii.md