Fixed conflicts

Bensas · May 26, 2022 · 0901245 · 0901245
2 parents 36a7e97 + baa18d0
commit 0901245
Show file tree

Hide file tree

Showing 19 changed files with 194 additions and 43 deletions.
diff --git a/DataAnalysis/Q1/Q1_sentiment_weekly.png b/DataAnalysis/Q1/Q1_sentiment_weekly.png
diff --git a/DataAnalysis/Q1/negativetweets_wordcloud.png b/DataAnalysis/Q1/negativetweets_wordcloud.png
diff --git a/DataAnalysis/Q1/neutraltweets_wordcloud.png b/DataAnalysis/Q1/neutraltweets_wordcloud.png
diff --git a/DataAnalysis/Q1/positivetweets_wordcloud.png b/DataAnalysis/Q1/positivetweets_wordcloud.png
diff --git a/DataAnalysis/Q1/q1_analysis.R b/DataAnalysis/Q1/q1_analysis.R
@@ -52,7 +52,7 @@ ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week)) +
   theme(axis.text.y = element_text(size=13)) +
   theme(legend.title = element_text(face='bold', size=15)) +
   theme(legend.text = element_text(size=15)) +
-  scale_x_continuous(breaks=c(1,11,16,23), labels=c("Dec 24th", "Feb 24th", "April 7th", "May 24th")) +
+  scale_x_continuous(breaks=c(1,4,11,16,23), labels=c("Dec 24th", "Jan 14th", "Feb 24th", "April 7th", "May 24th")) +
   scale_fill_manual('label', values=c('#d9534f', '#f0ad4e', '#5cb85c'))
 
 # graph (by month)

diff --git a/DataAnalysis/Q2/q2_before_after_russian_tweets.R b/DataAnalysis/Q2/q2_before_after_russian_tweets.R
@@ -70,20 +70,20 @@ plot_barplot_before_after(q2_nato_russian, "Sentiment of russian tweets containi
 
 # English
 
-q2_zelensky_english <-  read.csv('UkraineConflictOnTwitter/SentimentAnalysis/data/q2/zelensky_english_with_sentiment.csv')
+q2_zelensky_english <-  read.csv('/Users/Bensas/ITBA/Intercambios/KAIST/Data\ Science\ Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q2/zelensky_english_with_sentiment.csv')
 conduct_chisq_before_after(q2_zelensky_english)
 plot_barplot_before_after(q2_zelensky_english, "Sentiment of english tweets containing \"Zelensky\"")
-# p-value for 1 week before/after = 7.735e-14 (significant)
+# p-value for 1 week before/after = 2.153e-11 (significant)
 
-q2_putin_english <-  read.csv('UkraineConflictOnTwitter/SentimentAnalysis/data/q2/putin_english_with_sentiment.csv')
+q2_putin_english <-  read.csv('/Users/Bensas/ITBA/Intercambios/KAIST/Data\ Science\ Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q2/putin_english_with_sentiment.csv')
 conduct_chisq_before_after(q2_putin_english)
 plot_barplot_before_after(q2_putin_english, "Sentiment of english tweets containing \"Putin\"")
-# p-value for 1 week before/after = 0.1959 (insignificant)
+# p-value for 1 week before/after = 0.01354 (significant)
 
-q2_nato_english <-  read.csv('UkraineConflictOnTwitter/SentimentAnalysis/data/q2/nato_english_with_sentiment.csv')
+q2_nato_english <-  read.csv('/Users/Bensas/ITBA/Intercambios/KAIST/Data\ Science\ Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q2/nato_english_with_sentiment.csv')
 conduct_chisq_before_after(q2_nato_english)
 plot_barplot_before_after(q2_nato_english, "Sentiment of english tweets containing \"NATO\"")
-# p-value for 1 week before/after = 0.1518 (insignificant)
+# p-value for 1 week before/after = 0.04421 (significant)
 
 
 
diff --git a/DataAnalysis/Q3/BigramWordCloud.R b/DataAnalysis/Q3/BigramWordCloud.R
@@ -0,0 +1,58 @@
+library(pacman) #my package manager
+
+#load necessary packages
+p_load(ggplot2)
+p_load(dplyr)
+p_load(reshape2)
+p_load(gridExtra)
+p_load(stringr)
+p_load(tidytext)
+p_load(tidyr)
+p_load(wordcloud)
+p_load(tm)
+
+####Loading all the data
+foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
+nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
+foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
+nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
+
+bigram_wc <- function(foxnews){
+  fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
+                                       n=2) %>% 
+    anti_join(stop_words)
+  bg_fox <- fox_unn %>% 
+    separate(word, c("word1", "word2"), sep=" ")
+
+  avoid_list <- c("russia", "ukraine", "user", "http", "fox", "york")
+  filter_bg_fox <- bg_fox %>% 
+    filter(!word1 %in% stop_words$word) %>% 
+    filter(!word2 %in% stop_words$word) %>% 
+    filter(!word1 %in% avoid_list) %>% 
+    filter(!word2 %in% avoid_list)
+
+  count_bg <- filter_bg_fox %>% 
+    group_by(word1, word2) %>% 
+    tally(sort = TRUE)
+
+  count_bg <- as.data.frame(count_bg)
+
+  count_bg$bigram <- paste(count_bg$word1, count_bg$word2, sep=" ")
+  wc <- wordcloud(words = count_bg$bigram, freq = count_bg$n, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, 
+                  colors=brewer.pal(8, "Dark2"))
+
+  return(wc)
+}
+
+fox_pos <- foxnews %>% filter(label=="Positive")
+fox_neg <- foxnews %>% filter(label=="Negative")
+fox_neu <- foxnews %>% filter(label=="Neutral")
+
+nyt_pos <- nytimes %>% filter(label=="Positive")
+nyt_neg <- nytimes %>% filter(label=="Negative")
+nyt_neu <- nytimes %>% filter(label=="Neutral")
+
+type(coocc_func(foxnews))
+df <- coocc_func(nytimes)
+df$bigram <- paste(df$word1, df$word2, sep=" ")
+head(df)
diff --git a/DataAnalysis/Q3/Bigram_Fox_Pos.png b/DataAnalysis/Q3/Bigram_Fox_Pos.png
diff --git a/DataAnalysis/Q3/Bigram_fox_neg.png b/DataAnalysis/Q3/Bigram_fox_neg.png
diff --git a/DataAnalysis/Q3/Cooccurance.R b/DataAnalysis/Q3/Cooccurance.R
@@ -0,0 +1,48 @@
+library(pacman) #my package manager
+
+#load necessary packages
+p_load(ggplot2)
+p_load(dplyr)
+p_load(reshape2)
+p_load(gridExtra)
+p_load(stringr)
+p_load(tidytext)
+p_load(tidyr)
+
+####Loading all the data
+foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
+nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
+foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
+nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
+
+coocc_func <- function(foxnews){
+  fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
+                                       n=2) %>% 
+    anti_join(stop_words)
+  bg_fox <- fox_unn %>% 
+    separate(word, c("word1", "word2"), sep=" ")
+
+  avoid_list <- c("russia", "ukraine", "user", "http")
+  filter_bg_fox <- bg_fox %>% 
+    filter(!word1 %in% stop_words$word) %>% 
+    filter(!word2 %in% stop_words$word) %>% 
+    filter(!word1 %in% avoid_list) %>% 
+    filter(!word2 %in% avoid_list)
+
+  count_bg <- filter_bg_fox %>% 
+    group_by(word1, word2) %>% 
+    tally(sort = TRUE)
+  return(count_bg)  
+}
+fox_pos <- foxnews %>% filter(label=="Positive")
+fox_neg <- foxnews %>% filter(label=="Negative")
+fox_neu <- foxnews %>% filter(label=="Neutral")
+
+nyt_pos <- nytimes %>% filter(label=="Positive")
+nyt_neg <- nytimes %>% filter(label=="Negative")
+nyt_neu <- nytimes %>% filter(label=="Neutral")
+
+type(coocc_func(foxnews))
+df <- coocc_func(nytimes)
+df$bigram <- paste(df$word1, df$word2, sep=" ")
+head(df)
diff --git a/DataAnalysis/Q3/Fox_all_bigrams.png b/DataAnalysis/Q3/Fox_all_bigrams.png
diff --git a/DataAnalysis/Q3/MakeWordCloud.R b/DataAnalysis/Q3/MakeWordCloud.R
@@ -0,0 +1,45 @@
+# Q1 wordcloud
+library(pacman)
+p_load(wordcloud)
+p_load(tm)
+p_load(dplyr)
+p_load(ggplot2)
+
+# source file -- change the file path here
+foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
+nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
+foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
+nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
+
+make_cloud <- function(dataset, sentiment){
+  positive <- fox_cloud[fox_cloud$label == sentiment,] 
+
+  # remove non-ascii words
+  positive$text <- stringi::stri_trans_general(positive$text, "latin-ascii")
+  positive$text <- gsub("[^\x01-\x7F]", "", positive$text)
+
+  # create corpus and preprocess data
+  docs <- Corpus(VectorSource(positive$text))
+  docs <- docs %>%
+    tm_map(removeNumbers) %>%
+    tm_map(removePunctuation) %>%
+    tm_map(stripWhitespace)
+  docs <- tm_map(docs, content_transformer(tolower))
+  docs <- tm_map(docs, removeWords, stopwords("english"))
+  docs <- tm_map(docs, removeWords, c("russia", "ukraine", "user", "http")) # remove "Russia" and "Ukraine"
+
+  # create matrix
+  dtm <- TermDocumentMatrix(docs) 
+  matrix <- as.matrix(dtm) 
+  words <- sort(rowSums(matrix),decreasing=TRUE) 
+  df <- data.frame(word = names(words),freq=words)
+
+  # create wordcloud
+  set.seed(1234)
+  wc <- wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, 
+            colors=brewer.pal(8, "Dark2"))
+  return(wc)
+
+}
+
+make_cloud(foxnews, "Neutral")
diff --git a/DataAnalysis/Q3/NYTimes_all_bigrams_top.png b/DataAnalysis/Q3/NYTimes_all_bigrams_top.png
diff --git a/DataAnalysis/Q3/Q3RAnalysisSheikh.R b/DataAnalysis/Q3/Q3RAnalysisSheikh.R
@@ -7,6 +7,8 @@ p_load(dplyr)
 p_load(reshape2)
 p_load(gridExtra)
 p_load(stringr)
+p_load(tidytext)
+p_load(tidyr)
 #reading data
 foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
 nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
@@ -130,4 +132,38 @@ ggplot(allSent, aes(x=Sentiment, y=Percentage, fill=Source)) +
 
 
 
+############################################
+#cooccurance score
+
+coocc_func <- function(foxnews){
+  fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
+                                       n=2) %>% 
+    anti_join(stop_words)
+  bg_fox <- fox_unn %>% 
+    separate(word, c("word1", "word2"), sep=" ")
+
+  avoid_list <- c("russia", "ukraine", "user", "http")
+  filter_bg_fox <- bg_fox %>% 
+    filter(!word1 %in% stop_words$word) %>% 
+    filter(!word2 %in% stop_words$word) %>% 
+    filter(!word1 %in% avoid_list) %>% 
+    filter(!word2 %in% avoid_list)
+
+  count_bg <- filter_bg_fox %>% 
+    group_by(word1, word2) %>% 
+    tally(sort = TRUE)
+  return(count_bg)  
+}
+fox_pos <- foxnews %>% filter(label=="Positive")
+fox_neg <- foxnews %>% filter(label=="Negative")
+fox_neu <- foxnews %>% filter(label=="Neutral")
+
+nyt_pos <- nytimes %>% filter(label=="Positive")
+nyt_neg <- nytimes %>% filter(label=="Negative")
+nyt_neu <- nytimes %>% filter(label=="Neutral")
+
+coocc_func(foxnews)
+coocc_func(nytimes)
+
+
 
diff --git a/DataAnalysis/Q3/Q3WordCloudPosFoxNews.R b/DataAnalysis/Q3/Q3WordCloudPosFoxNews.R
diff --git a/DataAnalysis/Q3/bigram_Nytimes_positive.png b/DataAnalysis/Q3/bigram_Nytimes_positive.png
diff --git a/DataAnalysis/Q3/bigram_fox_news_neutral.png b/DataAnalysis/Q3/bigram_fox_news_neutral.png
diff --git a/DataAnalysis/Q3/bigram_nyt_negative.png b/DataAnalysis/Q3/bigram_nyt_negative.png
diff --git a/DataAnalysis/Q3/bigram_nyt_neutral_.png b/DataAnalysis/Q3/bigram_nyt_neutral_.png