forked from kovidgoyal/calibre
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharabian_business.recipe
83 lines (74 loc) · 4.3 KB
/
arabian_business.recipe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.arabianbusiness.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Arabian_Business(BasicNewsRecipe):
title = 'Arabian Business'
__author__ = 'Darko Miletic'
description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.' # noqa
publisher = 'Arabian Business Publishing Ltd.'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newsportal'
masthead_url = 'http://www.arabianbusiness.com/skins/ab.main/gfx/arabianbusiness_logo_sm.gif'
extra_css = """
body{font-family: Georgia,serif }
img{margin-bottom: 0.4em; margin-top: 0.4em; display:block}
.byline,.dateline{font-size: small; display: inline; font-weight: bold}
ul{list-style: none outside none;}
"""
conversion_options = {
'comment': description, 'publisher': publisher, 'language': language
}
remove_tags_before = dict(attrs={'id': 'article-title'})
remove_tags = [
dict(name=['meta', 'link', 'base', 'iframe', 'embed', 'object']), dict(
attrs={'class': 'printfooter'})
]
remove_attributes = ['lang']
feeds = [
(u'Africa', u'http://www.arabianbusiness.com/world/Africa/?service=rss'),
(u'Americas', u'http://www.arabianbusiness.com/world/americas/?service=rss'),
(u'Asia Pacific', u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss'),
(u'Europe', u'http://www.arabianbusiness.com/world/europe/?service=rss'),
(u'Middle East', u'http://www.arabianbusiness.com/world/middle-east/?service=rss'),
(u'South Asia', u'http://www.arabianbusiness.com/world/south-asia/?service=rss'),
(u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss'),
(u'Construction', u'http://www.arabianbusiness.com/industries/construction/?service=rss'),
(u'Education', u'http://www.arabianbusiness.com/industries/education/?service=rss'),
(u'Energy', u'http://www.arabianbusiness.com/industries/energy/?service=rss'),
(u'Healthcare', u'http://www.arabianbusiness.com/industries/healthcare/?service=rss'),
(u'Media', u'http://www.arabianbusiness.com/industries/media/?service=rss'),
(u'Real Estate', u'http://www.arabianbusiness.com/industries/real-estate/?service=rss'),
(u'Retail', u'http://www.arabianbusiness.com/industries/retail/?service=rss'),
(u'Technology', u'http://www.arabianbusiness.com/industries/technology/?service=rss'),
(u'Transport', u'http://www.arabianbusiness.com/industries/transport/?service=rss'),
(u'Travel', u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss'),
(u'Equities', u'http://www.arabianbusiness.com/markets/equities/?service=rss'),
(u'Commodities', u'http://www.arabianbusiness.com/markets/commodities/?service=rss'),
(u'Currencies', u'http://www.arabianbusiness.com/markets/currencies/?service=rss'),
(u'Market Data', u'http://www.arabianbusiness.com/markets/market-data/?service=rss'),
(u'Comment', u'http://www.arabianbusiness.com/opinion/comment/?service=rss'),
(u'Think Tank', u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss'),
(u'Arts', u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss'),
(u'Cars', u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss'),
(u'Food', u'http://www.arabianbusiness.com/lifestyle/food/?service=rss'),
(u'Sport', u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss')
]
def print_version(self, url):
return url + '?service=printer&page='
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup