Skip to content

Commit f2de819

Browse files
committed
add lintcode parser
1 parent b6b8e6f commit f2de819

File tree

2 files changed

+86
-4
lines changed

2 files changed

+86
-4
lines changed

scripts/lintcode.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
5+
from pyquery import PyQuery as pq
6+
7+
8+
class Lintcode(object):
9+
10+
def __init__(self):
11+
self.driver = None
12+
13+
def open_url(self, url):
14+
self.url = url
15+
print('open URL: {}'.format(url))
16+
self.driver = pq(url=url)
17+
18+
def get_title(self):
19+
print('get title...')
20+
title = self.driver('title').text()
21+
return title
22+
23+
def get_description(self):
24+
print('get description...')
25+
desc_pq = self.driver('#description')
26+
desc_html = desc_pq('.m-t-lg:nth-child(1)').html()
27+
example_html = desc_pq('.m-t-lg:nth-child(2)').html()
28+
return desc_html + example_html
29+
30+
def get_difficulty(self):
31+
print('get difficulty...')
32+
progress_bar = self.driver('.progress-bar')
33+
original_title = progress_bar.attr('data-original-title')
34+
splits = original_title.strip().split(' ')
35+
difficulty = splits[1]
36+
ac_rate = splits[-1]
37+
return difficulty
38+
39+
def get_tags(self):
40+
print('get tags...')
41+
tags = []
42+
for i in self.driver('#tags.tags a'):
43+
tags.append(i.text)
44+
return tags
45+
46+
def _get_related(self):
47+
print('get related...')
48+
related = self.driver('.m-t-lg:last')
49+
return related
50+
51+
def _clean_url(self, url):
52+
new_url = ['http:/', 'www.lintcode.com', 'en/problem']
53+
problem_slug = url[len('http://'):].strip('/').split('/')[3]
54+
new_url.append(problem_slug)
55+
return '/'.join(new_url)
56+
57+
def get_problem_all(self, url):
58+
"""获取所有细节"""
59+
print('get all the problem detail...')
60+
self.open_url(url)
61+
title = self.get_title()
62+
difficulty = self.get_difficulty()
63+
tags = self.get_tags()
64+
description = self.get_description()
65+
problem = {
66+
'title': title,
67+
'difficulty': difficulty,
68+
'tags': tags,
69+
'description': description,
70+
'url': self._clean_url(url)
71+
}
72+
return problem
73+
74+
75+
if __name__ == '__main__':
76+
url = 'http://www.lintcode.com/en/problem/palindrome-number/'
77+
leetcode = Lintcode()
78+
print(leetcode.get_problem_all(url))

scripts/main.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from util import par_dir, mkdir_p
1212
from leetcode import Leetcode
13+
from lintcode import Lintcode
1314
from ojhtml2markdown import problem2md
1415

1516
BASEDIR = os.path.abspath(os.path.dirname(__file__))
@@ -38,11 +39,14 @@ def curr_time():
3839
raw_url = args.new
3940
problem_md = ''
4041
problem_slug = ''
42+
xxxcode = None
4143
if raw_url.startswith('https://leetcode'):
42-
leetcode = Leetcode()
43-
problem = leetcode.get_problem_all(raw_url)
44-
problem_slug = slugify(problem['title'], separator="_")
45-
problem_md = problem2md(problem)
44+
xxxcode = Leetcode()
45+
elif raw_url.startswith('http://www.lintcode.com'):
46+
xxxcode = Lintcode()
47+
problem = xxxcode.get_problem_all(raw_url)
48+
problem_slug = slugify(problem['title'], separator="_")
49+
problem_md = problem2md(problem)
4650

4751
if args.dir:
4852
post_dir = os.path.join(ROOTDIR, args.dir)

0 commit comments

Comments
 (0)