Skip to content

Commit

Permalink
done some more analysis and polished some codes
Browse files Browse the repository at this point in the history
  • Loading branch information
sheikhshafayat committed May 27, 2022
1 parent 0901245 commit cfd2c15
Show file tree
Hide file tree
Showing 9 changed files with 315 additions and 1,275 deletions.
5 changes: 1 addition & 4 deletions DataAnalysis/Q3/BigramWordCloud.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,4 @@ nyt_pos <- nytimes %>% filter(label=="Positive")
nyt_neg <- nytimes %>% filter(label=="Negative")
nyt_neu <- nytimes %>% filter(label=="Neutral")

type(coocc_func(foxnews))
df <- coocc_func(nytimes)
df$bigram <- paste(df$word1, df$word2, sep=" ")
head(df)
filter_by_date <-
11 changes: 11 additions & 0 deletions DataAnalysis/Q3/Cooccurance.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Meth
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
####pre-processing#####
nytimes <- nytimes %>% select(Date, text, label, score)
nytimes$Date <- sub(" .*", "", nytimes$Date) %>% as.Date(format="%Y-%m-%d", tz="UTC")
nytimes$text <- tolower(nytimes$text)

foxnews <- foxnews %>% select(Date, text, label, score)
foxnews$Date <- sub(" .*", "", foxnews$Date) %>% as.Date(format="%Y-%m-%d", tz="UTC")
foxnews$text <- tolower(foxnews$text)
##########

coocc_func <- function(foxnews){
fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
Expand Down Expand Up @@ -42,6 +51,8 @@ nyt_pos <- nytimes %>% filter(label=="Positive")
nyt_neg <- nytimes %>% filter(label=="Negative")
nyt_neu <- nytimes %>% filter(label=="Neutral")

int_point <- foxnews %>% filter(Date > "2022-01-05")
int_point <- int_point %>% filter(Date < "2022-01-20")
type(coocc_func(foxnews))
df <- coocc_func(nytimes)
df$bigram <- paste(df$word1, df$word2, sep=" ")
Expand Down
Binary file added DataAnalysis/Q3/FoxNewsTitleSentimentByWeek.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion DataAnalysis/Q3/MakeWordCloud.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Met
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")

make_cloud <- function(dataset, sentiment){
positive <- fox_cloud[fox_cloud$label == sentiment,]
positive <- dataset[dataset$label == sentiment,]

# remove non-ascii words
positive$text <- stringi::stri_trans_general(positive$text, "latin-ascii")
Expand Down Expand Up @@ -43,3 +43,4 @@ make_cloud <- function(dataset, sentiment){
}

make_cloud(foxnews, "Neutral")

Binary file added DataAnalysis/Q3/NYTimesTitleSentiment.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
57 changes: 57 additions & 0 deletions DataAnalysis/Q3/Q3LinePlot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
library(ggplot2)
library(dplyr)

# csv file -- change the file path here
#q1 <- read.csv('/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q1/all_tweets_emotions_with_sentiment.csv')
#foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
#nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
#nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
q1 <- foxtitle


## stacked bar plot ##

# convert string to datetime
q1$Date <- sub(" .*", "", q1$Date)
q1$Date <- as.Date(q1$Date, format="%Y-%m-%d", tz="UTC")

# create YearMonth column
q1$YearMonth <- substr(q1$Date, 1,7)

# create Week column
q1 <- q1 %>%
mutate(Week = cut.Date(q1$Date, breaks = "1 week", labels = FALSE)) %>%
arrange(q1$Date)

# calculate percentage of sentiment by week
sentiment_by_week <- q1 %>%
group_by(Week, label) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt / sum(cnt), 3)) %>%
arrange(Week)

# calculate percentage of sentiment by month
sentiment_by_month <- q1 %>%
group_by(YearMonth, label) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt / sum(cnt), 3)) %>%
arrange(YearMonth)

# graph (by week)
ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week, col=label)) +
geom_line(lwd=1.5) +
theme_minimal() +
theme(panel.background = element_blank()) +
ggtitle("NY Times Average Emotions of Tweets by Week") +
labs(x='Week', y='Frequency') +
theme(plot.title = element_text(hjust = 0.5, size=15, face='bold', margin = margin(t = 10, r = 0 , b = 10, l = 0))) +
theme(axis.title.x = element_text(face='bold', size=10, margin = margin(t = 10, b = 10, r = 0, l = 0))) +
theme(axis.title.y = element_text(face='bold', size=10, margin = margin(t = 0, b = 0, r = 10, l = 10))) +
theme(axis.text.x = element_text(angle=30, size=10)) +
theme(axis.text.y = element_text(size=10)) +
theme(legend.title = element_text(face='bold', size=10)) +
theme(legend.text = element_text(size=8)) +
scale_x_continuous(breaks=c(1,11,16,23), labels=c("Dec 24th", "Feb 24th", "April 7th", "May 24th"))
#scale_color_manual('label', values=c('#d9534f', '#f0ad4e', '#5cb85c', '#5cb86c', '#5cb87c', '#5cb88c', '#5cb89c'))

8 changes: 5 additions & 3 deletions DataAnalysis/Q3/Q3RAnalysisSheikh.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#This file is a mess. I only used this to make the graphs

#Research Question 3
library(pacman) #my package manager

Expand Down Expand Up @@ -87,6 +89,7 @@ ggplot(sentiment_by_week_combined, aes(fill=source, y=freq, x=Week)) +


####Checking the overall sentiment of the news

foxTotal <- table(foxnews$label)/length(foxnews$label) * 100
nyTotal <- table(nytimes$label)/length(nytimes$label) * 100

Expand All @@ -107,9 +110,6 @@ ggplot(nyTotal, aes(x="", y=Percentage, fill=Sentiment)) +
theme(panel.background = element_blank())


gfox1
gnyt1

#barplot
gfox2 <- ggplot(foxTotal, aes(x=Sentiment, y=Percentage, fill=Sentiment)) +
geom_bar(stat="identity", width = 1, color="white") +
Expand All @@ -128,6 +128,8 @@ ggplot(allSent, aes(x=Sentiment, y=Percentage, fill=Source)) +
ggtitle("Media Outlet Tweet and Reply Sentiment Comparison") +
scale_fill_manual(values=c("#fc4949", "#1a94eb"))

###############################################################################




Expand Down
Binary file added DataAnalysis/Q3/Weekly Ukraine Related Tweets.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit cfd2c15

Please sign in to comment.