forked from kovidgoyal/calibre
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ambito.recipe
69 lines (61 loc) · 2.41 KB
/
ambito.recipe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python
# -*- mode: python -*-
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2008-2021, Darko Miletic <darko.miletic at gmail.com>'
'''
ambito.com
'''
from calibre.web.feeds.news import BasicNewsRecipe, classes
class Ambito(BasicNewsRecipe):
title = 'Ambito.com'
__author__ = 'Darko Miletic'
description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires'
publisher = 'Editorial Nefir S.A.'
category = 'news, politics, economy, finances, Argentina'
oldest_article = 1.2
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
remove_empty_feeds = True
compress_news_images = True
scale_news_images_to_device = True
ignore_duplicate_articles = {'url'}
language = 'es_AR'
publication_type = 'newsportal'
masthead_url = 'https://www.ambito.com/css-custom/239/images/logo-239-2020v2.svg'
extra_css = """
body{font-family: Roboto, sans-serif}
"""
conversion_options = {
'comment': description,
'tags': category,
'publisher': publisher,
'language': language
}
keep_only_tags = [
classes(
'detail-highlighted-multimedia news-headline__publication-date news-headline__title'
' news-headline__author-wrapper news-headline__article-summary'
),
dict(name='article', attrs={'class': lambda x: x and 'article-body' in x.split()}),
]
remove_tags = [
dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link'])
]
feeds = [
(u'Portada', u'https://www.ambito.com/rss/home.xml'),
(u'Economia', u'https://www.ambito.com/rss/economia.xml'),
(u'Finanzas', u'https://www.ambito.com/rss/finanzas.xml'),
(u'Politica', u'https://www.ambito.com/rss/politica.xml'),
(u'Opinion', u'https://www.ambito.com/rss/opinion.xml'),
(u'Informacion General', u'https://www.ambito.com/rss/informacion-general.xml'),
(u'Mundo', u'https://www.ambito.com/rss/mundo.xml'),
(u'Deportes', u'https://www.ambito.com/rss/deportes.xml'),
(u'Espectaculos', u'https://www.ambito.com/rss/espectaculos.xml'),
(u'Nacional', u'https://www.ambito.com/rss/nacional.xml')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-td-src-property':True}):
img['src'] = img['data-td-src-property']
return soup