Skip to content

Commit

Permalink
added two more files for word clouds
Browse files Browse the repository at this point in the history
  • Loading branch information
sheikhshafayat committed May 26, 2022
1 parent 30bc1dc commit a42cd4d
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 45 deletions.
55 changes: 12 additions & 43 deletions DataAnalysis/Q3/Q3RAnalysisSheikh.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ p_load(reshape2)
p_load(gridExtra)
p_load(stringr)
#reading data
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")

foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
#######3#preprocess everything

nytimes <- nytimes %>% select(Date, text, label, score)
Expand All @@ -23,7 +24,6 @@ foxnews$text <- tolower(foxnews$text)

#########Copied these codes from Juan's code##########


# create YearMonth column
foxnews$YearMonth <- substr(foxnews$Date, 1,7)
nytimes$YearMonth <- substr(nytimes$Date, 1,7)
Expand Down Expand Up @@ -92,16 +92,18 @@ foxTotal <- melt(foxTotal) %>% rename(c(Sentiment = Var1, Percentage=value)) %>%
nyTotal <- melt(nyTotal) %>% rename(c(Sentiment = Var1, Percentage=value)) %>% mutate(Source="NYT")
allSent <- rbind(foxTotal, nyTotal)

gfox1 <- ggplot(foxTotal, aes(x="", y=Percentage, fill=Sentiment)) +
ggplot(foxTotal, aes(x="", y=Percentage, fill=Sentiment)) +
geom_bar(stat="identity", width = 1, color="white") +
coord_polar("y", start = 0) +
theme(legend.position="none", panel.background = element_blank())

ggtitle("Fox News Tweets and Replies' Sentiment") +
theme(panel.background = element_blank())

gnyt1 <- ggplot(nyTotal, aes(x="", y=Percentage, fill=Sentiment)) +
ggplot(nyTotal, aes(x="", y=Percentage, fill=Sentiment)) +
geom_bar(stat="identity", width = 1, color="white") +
coord_polar("y", start = 0) +
theme_void()
ggtitle("NYT Tweets and Replies' Sentiment") +
theme(panel.background = element_blank())


gfox1
gnyt1
Expand All @@ -121,44 +123,11 @@ gnyt2 <- ggplot(nyTotal, aes(x=Sentiment, y=Percentage, fill=Sentiment)) +
ggplot(allSent, aes(x=Sentiment, y=Percentage, fill=Source)) +
geom_bar(stat="identity", width = 1, position = position_dodge()) +
theme(panel.background = element_blank()) +
ggtitle("Media Outlet Tweet and Reply Sentiment Comparison") +
scale_fill_manual(values=c("#fc4949", "#1a94eb"))



############## Filtering out war related tweets####################
#we look for the following words
warwords <- c("invasion", "ukraine", "russia", "war", "putin", "zelensky", "nato", "eu", "invade")

warTweet <- function(text) {
word = unlist(strsplit(text, " "))[1]
if (word %in% warwords) {
TRUE
} else{
FALSE
}
}
warfox <- foxnews %>% filter(sapply(foxnews$text, warTweet))
warnyt <- nytimes %>% filter(sapply(nytimes$text, warTweet))

######Plotting similar graph for war tweets:
foxTotal2 <- table(warfox$label)/length(warfox$label) * 100
nyTotal2 <- table(warnyt$label)/length(warnyt$label) * 100
#positive and negative newstitles
foxTotal2 <- melt(foxTotal2) %>% rename(c(Sentiment = Var1, Percentage=value)) %>% mutate(Source="FoxNews")
nyTotal2 <- melt(nyTotal2) %>% rename(c(Sentiment = Var1, Percentage=value)) %>% mutate(Source="NYT")
allSent2 <- rbind(foxTotal2, nyTotal2)
ggplot(allSent2, aes(x=Sentiment, y=Percentage, fill=Source)) +
geom_bar(stat="identity", width = 1, position = position_dodge()) +
theme(panel.background = element_blank()) +
scale_fill_manual(values=c("#fc4949", "#1a94eb"))

#to check the size of the dataframes. unfortunately they are quite small
dim(warfox)
dim(warnyt)

#########Plotting the time series of sentiment########
ggplot(foxnews, aes(x=Date, y=label)) + geom_line()




128 changes: 128 additions & 0 deletions DataAnalysis/Q3/Q3TitleAnalysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
library(pacman) #my package manager

#load necessary packages
p_load(ggplot2)
p_load(dplyr)
p_load(reshape2)
p_load(gridExtra)
p_load(stringr)
#reading data


foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
#######3#preprocess everything

nytitle <- nytitle %>% select(Date, text, label, score)
nytitle$Date <- sub(" .*", "", nytitle$Date) %>% as.Date(format="%Y-%m-%d", tz="UTC")
nytitle$text <- tolower(nytitle$text)

foxtitle <- foxtitle %>% select(Date, text, label, score)
foxtitle$Date <- sub(" .*", "", foxtitle$Date) %>% as.Date(format="%Y-%m-%d", tz="UTC")
foxtitle$text <- tolower(foxtitle$text)

#########Copied these codes from Juan's code##########

# create YearMonth column
foxtitle$YearMonth <- substr(foxtitle$Date, 1,7)
nytitle$YearMonth <- substr(nytitle$Date, 1,7)
# create Week column
foxtitle <- foxtitle %>%
mutate(Week = cut.Date(foxtitle$Date, breaks = "1 week", labels = FALSE)) %>%
arrange(foxtitle$Date)

nytitle <- nytitle %>%
mutate(Week = cut.Date(nytitle$Date, breaks = "1 week", labels = FALSE)) %>%
arrange(nytitle$Date)

# calculate percentage of sentiment by week
sentiment_by_week_fox <- foxtitle %>%
group_by(Week, label) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt / sum(cnt), 3)) %>%
arrange(Week) %>% mutate(source = "foxtitle")

sentiment_by_week_nytitle <- nytitle %>%
group_by(Week, label) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt / sum(cnt), 3)) %>%
arrange(Week) %>% mutate(source = "nytitle")

sentiment_by_week_combined <- rbind(sentiment_by_week_fox, sentiment_by_week_nytitle)

# calculate percentage of sentiment by month
sentiment_by_month_fox <- foxtitle %>%
group_by(YearMonth, label) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt / sum(cnt), 3)) %>%
arrange(YearMonth) %>% mutate(source = "foxtitle")

sentiment_by_month_nytitle <- nytitle %>%
group_by(YearMonth, label) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt / sum(cnt), 3)) %>%
arrange(YearMonth) %>% mutate(source = "nytitle")

sentiment_by_month_combined <- rbind(sentiment_by_month_fox, sentiment_by_month_nytitle)


#####################################
###Plotting graphs
ggplot(sentiment_by_week_combined, aes(fill=source, y=freq, x=Week)) +
geom_bar(position='dodge', stat='identity') +
theme_minimal() +
theme(panel.background = element_blank()) +
ggtitle("Average Sentiment of Tweets by Week") +
labs(x='Week', y='Frequency') +
theme(plot.title = element_text(hjust = 0.5, size=20, face='bold', margin = margin(t = 10, r = 0 , b = 10, l = 0))) +
theme(axis.title.x = element_text(face='bold', size=15, margin = margin(t = 10, b = 10, r = 0, l = 0))) +
theme(axis.title.y = element_text(face='bold', size=15, margin = margin(t = 0, b = 0, r = 10, l = 10))) +
theme(axis.text.x = element_text(angle=30, size=13)) +
theme(axis.text.y = element_text(size=13)) +
theme(legend.title = element_text(face='bold', size=15)) +
theme(legend.text = element_text(size=15)) +
scale_x_continuous(breaks=c(1,11,16,23), labels=c("Dec 24th", "Feb 24th", "April 7th", "May 24th")) +
scale_fill_manual('label', values=c('#d9534f', '#f0ad4e', '#5cb85c'))


####Checking the overall sentiment of the news
foxTotal <- table(foxtitle$label)/length(foxtitle$label) * 100
nyTotal <- table(nytitle$label)/length(nytitle$label) * 100

foxTotal <- melt(foxTotal) %>% rename(c(Sentiment = Var1, Percentage=value)) %>% mutate(Source="foxtitle")
nyTotal <- melt(nyTotal) %>% rename(c(Sentiment = Var1, Percentage=value)) %>% mutate(Source="NYT")
allSent <- rbind(foxTotal, nyTotal)

ggplot(foxTotal, aes(x="", y=Percentage, fill=Sentiment)) +
geom_bar(stat="identity", width = 1, color="white") +
coord_polar("y", start = 0) +
ggtitle("Fox News Title Average Sentiment") +
theme(panel.background = element_blank())


ggplot(nyTotal, aes(x="", y=Percentage, fill=Sentiment)) +
geom_bar(stat="identity", width = 1, color="white") +
coord_polar("y", start = 0) +
ggtitle("NYT Title Average Sentiment") +
theme(panel.background = element_blank())

gfox1
gnyt1

#barplot
gfox2 <- ggplot(foxTotal, aes(x=Sentiment, y=Percentage, fill=Sentiment)) +
geom_bar(stat="identity", width = 1, color="white") +
theme(legend.position="none", panel.background = element_blank())

gnyt2 <- ggplot(nyTotal, aes(x=Sentiment, y=Percentage, fill=Sentiment)) +
geom_bar(stat="identity", width = 1, color="white") +
theme(legend.position="none", panel.background = element_blank())

#Plots side by side in one graph
#This is the side by side comparison of the sentiment of tweets of all tweets

ggplot(allSent, aes(x=Sentiment, y=Percentage, fill=Source)) +
geom_bar(stat="identity", width = 1, position = position_dodge()) +
theme(panel.background = element_blank()) +
ggtitle("Media Tweet Sentiment Comparison") +
scale_fill_manual(values=c("#fc4949", "#1a94eb"))
9 changes: 7 additions & 2 deletions DataAnalysis/Q3/Q3WordCloudPosFoxNews.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# Q1 wordcloud
library(pacman)
p_load(wordcloud)
p_load(tm)
p_load(dplyr)
p_load(ggplot2)

# source file -- change the file path here
fox_cloud <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")

positive <- fox_cloud[fox_cloud$label == 'Neutral',]
positive <- fox_cloud[fox_cloud$label == 'Positive',]

# remove non-ascii words
positive$text <- stringi::stri_trans_general(positive$text, "latin-ascii")
Expand All @@ -17,7 +22,7 @@ docs <- docs %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
docs <- tm_map(docs, removeWords, c("russia", "ukraine", "user")) # remove "Russia" and "Ukraine"
docs <- tm_map(docs, removeWords, c("russia", "ukraine", "user", "http")) # remove "Russia" and "Ukraine"

# create matrix
dtm <- TermDocumentMatrix(docs)
Expand Down

0 comments on commit a42cd4d

Please sign in to comment.