Skip to content

vibaskaran/Social-Analytics

Repository files navigation

News Mood

  • Review of last 100 Tweets by top news outlets shows FOX tweets have considerably higher compound sentiment score than Others. (Results obtained utilizing Vader Sentiment Analyzer).

  • CNN has the low compound sentiment score.

  • Needs to analyze more to get specific details

# Dependencies
import tweepy
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import random as r 
import seaborn as sn
from math import trunc

# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
# Twitter API Keys
consumer_key = "c8iPOfqAcm1qys97q3RW0wKOM"
consumer_secret = "6sUoqS7FtJkIKTfdoWaZHEf5quOEeqAfrJLsSJMzJd2Aw1myoF"
access_token = "229598666-EnGXtgqnFXT8zjHrMTI7medln9PlPdT5thbQhjJh"
access_token_secret = "Bdr3ujJgJqgf6wksE2b8OQ7CxkFTCAi5KolqLZrc9giNH"
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())
# Target User Accounts
target_user = ('@BBC', '@CBS', '@CNN', '@Fox', '@New York Times')
#target_user = ('@CNN')
# A list to hold tweet timestamps
tweet_times = []
user_list = []
compound_list = []
positive_list = []
negative_list = []
neutral_list = []
time_list = []
my_index = []

# Loop through each user
for user in target_user:
    idx = 0
    # Loop through 10 pages of tweets (total 200 tweets)
    for x in range(5):
        public_tweets = api.user_timeline(user)
        
        # Loop through all tweets
        for tweet in public_tweets:

            # Run Vader Analysis on each tweet
            compound = analyzer.polarity_scores(tweet["text"])["compound"]
            pos = analyzer.polarity_scores(tweet["text"])["pos"]
            neu = analyzer.polarity_scores(tweet["text"])["neu"]
            neg = analyzer.polarity_scores(tweet["text"])["neg"]
            
            # Add each value to the appropriate array
            user_list.append(user)
            compound_list.append(compound)
            positive_list.append(pos)
            negative_list.append(neg)
            neutral_list.append(neu)
            my_index.append(idx)
            idx +=1

# Adding all the tweets into an Array

news_data = pd.DataFrame({'Agency': user_list, 'Compound': compound_list,'Positive': positive_list,
                            'Neutral':neutral_list,'Negative':negative_list,
                         'My_index':my_index
                         })
news_data.set_index('My_index')
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
Agency Compound Negative Neutral Positive
My_index
0 @BBC -0.4007 0.114 0.886 0.000
1 @BBC -0.5106 0.148 0.852 0.000
2 @BBC 0.0000 0.000 1.000 0.000
3 @BBC 0.0000 0.000 1.000 0.000
4 @BBC 0.3327 0.000 0.894 0.106
5 @BBC 0.5562 0.000 0.805 0.195
6 @BBC 0.0000 0.000 1.000 0.000
7 @BBC 0.5399 0.000 0.812 0.188
8 @BBC 0.0000 0.000 1.000 0.000
9 @BBC 0.4939 0.000 0.849 0.151
10 @BBC -0.3182 0.141 0.859 0.000
11 @BBC 0.3612 0.000 0.872 0.128
12 @BBC 0.4939 0.000 0.814 0.186
13 @BBC 0.4215 0.000 0.865 0.135
14 @BBC 0.0000 0.000 1.000 0.000
15 @BBC 0.4574 0.000 0.857 0.143
16 @BBC 0.0000 0.000 1.000 0.000
17 @BBC 0.7906 0.000 0.696 0.304
18 @BBC 0.0000 0.000 1.000 0.000
19 @BBC 0.6361 0.000 0.792 0.208
20 @BBC -0.4007 0.114 0.886 0.000
21 @BBC -0.5106 0.148 0.852 0.000
22 @BBC 0.0000 0.000 1.000 0.000
23 @BBC 0.0000 0.000 1.000 0.000
24 @BBC 0.3327 0.000 0.894 0.106
25 @BBC 0.5562 0.000 0.805 0.195
26 @BBC 0.0000 0.000 1.000 0.000
27 @BBC 0.5399 0.000 0.812 0.188
28 @BBC 0.0000 0.000 1.000 0.000
29 @BBC 0.4939 0.000 0.849 0.151
... ... ... ... ... ...
70 @New York Times 0.0000 0.000 1.000 0.000
71 @New York Times 0.0000 0.000 1.000 0.000
72 @New York Times 0.0000 0.000 1.000 0.000
73 @New York Times 0.0000 0.000 1.000 0.000
74 @New York Times 0.0000 0.000 1.000 0.000
75 @New York Times 0.4588 0.000 0.250 0.750
76 @New York Times 0.8807 0.000 0.595 0.405
77 @New York Times 0.5574 0.000 0.783 0.217
78 @New York Times 0.6705 0.000 0.476 0.524
79 @New York Times 0.7824 0.000 0.623 0.377
80 @New York Times 0.0000 0.000 1.000 0.000
81 @New York Times 0.0000 0.000 1.000 0.000
82 @New York Times 0.0000 0.000 1.000 0.000
83 @New York Times 0.0000 0.000 1.000 0.000
84 @New York Times 0.0000 0.000 1.000 0.000
85 @New York Times 0.0000 0.000 1.000 0.000
86 @New York Times 0.0000 0.000 1.000 0.000
87 @New York Times 0.0000 0.000 1.000 0.000
88 @New York Times 0.0000 0.000 1.000 0.000
89 @New York Times 0.0000 0.000 1.000 0.000
90 @New York Times 0.0000 0.000 1.000 0.000
91 @New York Times 0.0000 0.000 1.000 0.000
92 @New York Times 0.0000 0.000 1.000 0.000
93 @New York Times 0.0000 0.000 1.000 0.000
94 @New York Times 0.0000 0.000 1.000 0.000
95 @New York Times 0.4588 0.000 0.250 0.750
96 @New York Times 0.8807 0.000 0.595 0.405
97 @New York Times 0.5574 0.000 0.783 0.217
98 @New York Times 0.6705 0.000 0.476 0.524
99 @New York Times 0.7824 0.000 0.623 0.377

500 rows × 5 columns

news_data['Agency'].unique()
array(['@BBC', '@CBS', '@CNN', '@Fox', '@New York Times'], dtype=object)
plt.figure(figsize=(12,8))
numbers = news_data[news_data['Agency'] == '@Fox'].index
plt.scatter(numbers, news_data[news_data['Agency']=='@Fox']['Compound'],  marker ='o', color='b', s=60, label = 'FOX')
plt.scatter(numbers,news_data[news_data['Agency']=='@CNN']['Compound'],  marker ='o', color='y', s=60, label = 'CNN')
plt.scatter(numbers, news_data[news_data['Agency']=='@New York Times']['Compound'], marker ='o', s=60, color='m', label = 'NYT')
plt.scatter(numbers,news_data[news_data['Agency']=='@BBC']['Compound'], marker ='o', s = 60, color='g', label = 'BBC')
plt.scatter(numbers,news_data[news_data['Agency']=='@CBS']['Compound'], marker ='o', color='r', label = 'CBS')
plt.gca().set(xlabel = 'Tweets Ago', ylabel = 'Tweet Polarity',title = 'Sentiment Analysis of Media Tweets')
plt.legend(loc = 'best',bbox_to_anchor=(1, 0.5), frameon=True)

plt.show()

png

Compound_percentage =  news_data.groupby('Agency').mean()['Compound'].to_frame("% Compound")
Compound_percentage
<style> .dataframe thead tr:only-child th { text-align: right; }
.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}
</style>
% Compound
Agency
@BBC 0.192695
@CBS 0.412080
@CNN -0.099390
@Fox 0.442165
@New York Times 0.167490
x=Compound_percentage.index
y=Compound_percentage['% Compound']
plt.figure(figsize=(16,8))
colors = ['green' if _y >=0.0 else 'red' for _y in y]
ax = sn.barplot(x, y, palette=colors)
for n, (label, _y) in enumerate(zip(x, y)):
    if _y <= 0.0:
        ax.annotate(
            s='{:f}'.format(_y), xy=(n, -0), ha='center',va='center',
            xytext=(0,10), color='k', textcoords='offset points')
    else:
        ax.annotate(
            s='{:f}'.format(_y), xy=(n, 0), ha='center',va='center',
            xytext=(0,10), color='k', textcoords='offset points')  
plt.gca().set(xlabel='News Channels', ylabel='% Tweet Polarity', title='Overall Media Sentiment')
plt.rc('grid', linestyle="--", color='black', linewidth=0.5)
plt.grid(True)
plt.show()

png

About

Social-Analytics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published