made minor changes

Bensas · Jun 6, 2022 · ebc3e67 · ebc3e67
1 parent b909bf0
commit ebc3e67
Show file tree

Hide file tree

Showing 6 changed files with 24 additions and 18 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/DataAnalysis/Q3/BigramWordCloud.R b/DataAnalysis/Q3/BigramWordCloud.R
@@ -12,20 +12,25 @@ p_load(wordcloud)
 p_load(tm)
 
 ####Loading all the data
-foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
-nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
-foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
-nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
+#foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
+#nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
+foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
+#nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
+
+foxnews <- foxnews %>% filter(grepl("^@", foxnews$text))
+nytimes <- nytimes %>% filter(grepl("^@", nytimes$text))
 
 bigram_wc <- function(foxnews){
+  foxnews$text <- tolower(foxnews$text)
+  foxnews <- distinct(foxnews, text, .keep_all = TRUE)
   foxnews$text <- removeNumbers(foxnews$text)
   fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
                                        n=2) %>% 
     anti_join(stop_words)
   bg_fox <- fox_unn %>% 
     separate(word, c("word1", "word2"), sep=" ")
 
-  avoid_list <- c("russia", "ukraine", "user", "http", "fox", "york", "tucker")
+  avoid_list <- c("russia", "ukraine", "user", "http", "fox", "york", "news", "tucker")
   filter_bg_fox <- bg_fox %>% 
     filter(!word1 %in% stop_words$word) %>% 
     filter(!word2 %in% stop_words$word) %>% 
@@ -45,12 +50,12 @@ bigram_wc <- function(foxnews){
   return(wc)
 }
 
-fox_pos <- bigram_wc(foxnews %>% filter(label=="Positive"))
-fox_neg <- bigram_wc(foxnews %>% filter(label=="Negative"))
-fox_neu <- bigram_wc(foxnews %>% filter(label=="Neutral"))
+fox_pos <- foxnews %>% filter(label=="Positive")
+fox_neg <- foxnews %>% filter(label=="Negative")
+fox_neu <- foxnews %>% filter(label=="Neutral")
 
-nyt_pos <- bigram_wc(nytimes %>% filter(label=="Positive"))
-nyt_neg <- bigram_wc(nytimes %>% filter(label=="Negative"))
-nyt_neu <- bigram_wc(nytimes %>% filter(label=="Neutral"))
+nyt_pos <- nytimes %>% filter(label=="Positive")
+nyt_neg <- nytimes %>% filter(label=="Negative")
+nyt_neu <- nytimes %>% filter(label=="Neutral")
 
 
diff --git a/DataAnalysis/Q3/Q3LinePlot.R b/DataAnalysis/Q3/Q3LinePlot.R
@@ -5,9 +5,9 @@ p_load(plotly)
 # csv file -- change the file path here
 #foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
 #nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
-#foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
-nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
-q1 <- nytitle
+foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
+#nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
+q1 <- foxtitle
 
 #filter tweets that starts with @
 #q1 <- q1 %>%  filter(!grepl("^@", q1$text))
@@ -46,17 +46,17 @@ p <- ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week, col=label)) +
   geom_line(lwd=1.5) +
   theme_minimal() + 
   theme(panel.background = element_blank()) +
-  ggtitle("NYTimes Average Sentiment of Tweets by Week") +
+  ggtitle("Fox News Avg Sentiment of Tweets by Week") +
   labs(x='Week', y='Frequency') +
   theme(plot.title = element_text(hjust = 0.5, size=15, face='bold', margin = margin(t = 10, r = 0 , b = 10, l = 0))) +
   theme(axis.title.x = element_text(face='bold', size=10, margin = margin(t = 10, b = 10, r = 0, l = 0))) +
   theme(axis.title.y = element_text(face='bold', size=10, margin = margin(t = 0, b = 0, r = 10, l = 10))) +
   theme(axis.text.x = element_text(angle=30, size=10)) +
   theme(axis.text.y = element_text(size=10)) +
-  theme(legend.title = element_text(face='bold', size=10)) +
+  theme(legend.title = element_text(face='bold', size=8)) +
   theme(legend.text = element_text(size=8)) +
-  scale_x_continuous(breaks=c(1,11,16,23), labels=c("Dec 24th", "Feb 24th", "April 7th", "May 24th"))
-#scale_color_manual('label', values=c('#d9534f', '#f0ad4e', '#5cb85c', '#5cb86c', '#5cb87c', '#5cb88c', '#5cb89c'))
+  scale_x_continuous(breaks=c(1,9,16,23), labels=c("Dec 24th", "Feb 24th", "April 15th", "May 24th")) +
+  scale_color_manual('label', values=c('#d9534f', '#f0ad4e', '#5cb85c', '#5cb86c', '#5cb87c', '#5cb88c', '#5cb89c'))
 ggplotly(p)
 
 

diff --git a/DataAnalysis/Q3/Q3SidebySideBarPlotSentiment.R b/DataAnalysis/Q3/Q3SidebySideBarPlotSentiment.R
@@ -135,6 +135,7 @@ ggplot(allSent, aes(x=Sentiment, y=Percentage, fill=Source)) +
   scale_fill_manual(values=c("#fc4949", "#1a94eb"))
 
 ###############################################################################
+#X-squared = 0.35763, df = 2, p-value = 0.8363
 
 
 

diff --git a/SentimentAnalysis/.DS_Store b/SentimentAnalysis/.DS_Store
diff --git a/SentimentAnalysis/data/.DS_Store b/SentimentAnalysis/data/.DS_Store
Original file line number	Diff line number	Diff line change
Expand Up		@@ -135,6 +135,7 @@ ggplot(allSent, aes(x=Sentiment, y=Percentage, fill=Source)) +
		scale_fill_manual(values=c("#fc4949", "#1a94eb"))

		###############################################################################
		#X-squared = 0.35763, df = 2, p-value = 0.8363



Expand Down