|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | + |
| 5 | +from pyquery import PyQuery as pq |
| 6 | + |
| 7 | + |
| 8 | +class Lintcode(object): |
| 9 | + |
| 10 | + def __init__(self): |
| 11 | + self.driver = None |
| 12 | + |
| 13 | + def open_url(self, url): |
| 14 | + self.url = url |
| 15 | + print('open URL: {}'.format(url)) |
| 16 | + self.driver = pq(url=url) |
| 17 | + |
| 18 | + def get_title(self): |
| 19 | + print('get title...') |
| 20 | + title = self.driver('title').text() |
| 21 | + return title |
| 22 | + |
| 23 | + def get_description(self): |
| 24 | + print('get description...') |
| 25 | + desc_pq = self.driver('#description') |
| 26 | + desc_html = desc_pq('.m-t-lg:nth-child(1)').html() |
| 27 | + example_html = desc_pq('.m-t-lg:nth-child(2)').html() |
| 28 | + return desc_html + example_html |
| 29 | + |
| 30 | + def get_difficulty(self): |
| 31 | + print('get difficulty...') |
| 32 | + progress_bar = self.driver('.progress-bar') |
| 33 | + original_title = progress_bar.attr('data-original-title') |
| 34 | + splits = original_title.strip().split(' ') |
| 35 | + difficulty = splits[1] |
| 36 | + ac_rate = splits[-1] |
| 37 | + return difficulty |
| 38 | + |
| 39 | + def get_tags(self): |
| 40 | + print('get tags...') |
| 41 | + tags = [] |
| 42 | + for i in self.driver('#tags.tags a'): |
| 43 | + tags.append(i.text) |
| 44 | + return tags |
| 45 | + |
| 46 | + def _get_related(self): |
| 47 | + print('get related...') |
| 48 | + related = self.driver('.m-t-lg:last') |
| 49 | + return related |
| 50 | + |
| 51 | + def _clean_url(self, url): |
| 52 | + new_url = ['http:/', 'www.lintcode.com', 'en/problem'] |
| 53 | + problem_slug = url[len('http://'):].strip('/').split('/')[3] |
| 54 | + new_url.append(problem_slug) |
| 55 | + return '/'.join(new_url) |
| 56 | + |
| 57 | + def get_problem_all(self, url): |
| 58 | + """获取所有细节""" |
| 59 | + print('get all the problem detail...') |
| 60 | + self.open_url(url) |
| 61 | + title = self.get_title() |
| 62 | + difficulty = self.get_difficulty() |
| 63 | + tags = self.get_tags() |
| 64 | + description = self.get_description() |
| 65 | + problem = { |
| 66 | + 'title': title, |
| 67 | + 'difficulty': difficulty, |
| 68 | + 'tags': tags, |
| 69 | + 'description': description, |
| 70 | + 'url': self._clean_url(url) |
| 71 | + } |
| 72 | + return problem |
| 73 | + |
| 74 | + |
| 75 | +if __name__ == '__main__': |
| 76 | + url = 'http://www.lintcode.com/en/problem/palindrome-number/' |
| 77 | + leetcode = Lintcode() |
| 78 | + print(leetcode.get_problem_all(url)) |
0 commit comments