-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy pathcnn.py
111 lines (97 loc) · 3.9 KB
/
cnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from bs4 import BeautifulSoup
import requests
class NewsCNN:
"""
Create an instance of `NewsCNN` class.\n
```python
news = NewsCNN()
```
| Methods | Details |
| ---------------------------- | -------------------------------------------------------------------------- |
| `.news_by_location(country="india)` | Returns the list of articles by a specific country. |
| `.news_by_category(type)` | Returns the list of articles by a specific category. |
"""
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
}
def news_by_location(self, country: str):
"""
Returns the relevant news articles corresponding to that particular geo-continent or country\n
Class - `NewsCNN`
Parameters: \n
- country: Name of the country\n
```python
news = newsCNN()
news.news_by_location()
```
"""
try:
sol = []
obj_keys = ["news", "link"]
location = country.lower()
URL = f"https://edition.cnn.com/world/{location}"
page = requests.get(URL)
parse = BeautifulSoup(page.content, "html.parser")
heads = parse.find_all("span", attrs={"data-editable": "headline"})
links1 = parse.find_all(
"a",
attrs={
"class": "container__link container_lead-plus-headlines-with-images__link"
},
)
links2 = parse.find_all(
"a", attrs={"class": "container__link container_vertical-strip__link"}
)
links3 = parse.find_all(
"a",
attrs={"class": "container__link container_lead-plus-headlines__link"},
)
base = "https://edition.cnn.com/"
allurls = []
allheads = []
for i in heads:
tmp = i.text
allheads.append(tmp)
for i in links1 + links2 + links3:
t = base + i["href"]
allurls.append(t)
allurls = list(set(allurls))
for i in range(len(allurls)):
obj_values = [allheads[i], allurls[i]]
new_obj = dict(zip(obj_keys, obj_values))
sol.append(new_obj)
return sol
except:
return None
def news_by_category(self, type: str):
"""
Returns a list of news articles from a specific category.
Parameters:
- type (str): The category of news articles to retrieve. Allowable types are: "politics", "business", "opinions", "health", "style".
Returns:
A list of dictionaries, each containing news article information including title and link, or an exception if an error occurs.
Example:
```python
news = NewsCNN()
politics_articles = news.news_by_category("politics")
```
"""
try:
sol = []
type = type.lower()
url = f"https://edition.cnn.com/{type}"
page = requests.get(url, headers=self.headers)
parse = BeautifulSoup(page.content, "html.parser")
articles = parse.find_all(
"a", {"class": "container__link container_lead-plus-headlines__link"}
)
for article in articles:
text = article.find("span", {"data-editable": "headline"})
if text:
link = "https://edition.cnn.com" + article["href"]
data = {"Title": text.text, "Link": link}
sol.append(data)
return sol
except Exception as e:
return e