added a readme file for my work and reorganized files

Bensas · Jun 4, 2022 · 6f2bb53 · 6f2bb53
1 parent 85c3ba9
commit 6f2bb53
Show file tree

Hide file tree

Showing 39 changed files with 477,332 additions and 58 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/DataAnalysis/Q3/Bigram_Fox_Pos.png → DataAnalysis/Q3/Figures/Bigram_Fox_Pos.png b/DataAnalysis/Q3/Bigram_Fox_Pos.png → DataAnalysis/Q3/Figures/Bigram_Fox_Pos.png
diff --git a/DataAnalysis/Q3/Bigram_fox_neg.png → DataAnalysis/Q3/Figures/Bigram_fox_neg.png b/DataAnalysis/Q3/Bigram_fox_neg.png → DataAnalysis/Q3/Figures/Bigram_fox_neg.png
diff --git a/...is/Q3/FoxNews Avg Sentiment Over Week.png → ...gures/FoxNews Avg Sentiment Over Week.png b/...is/Q3/FoxNews Avg Sentiment Over Week.png → ...gures/FoxNews Avg Sentiment Over Week.png
diff --git a/...alysis/Q3/FoxNewsTitleSentimentByWeek.png → ...3/Figures/FoxNewsTitleSentimentByWeek.png b/...alysis/Q3/FoxNewsTitleSentimentByWeek.png → ...3/Figures/FoxNewsTitleSentimentByWeek.png
diff --git a/DataAnalysis/Q3/Fox_all_bigrams.png → DataAnalysis/Q3/Figures/Fox_all_bigrams.png b/DataAnalysis/Q3/Fox_all_bigrams.png → DataAnalysis/Q3/Figures/Fox_all_bigrams.png
diff --git a/...alysis/Q3/NYT Avg Sentiment Over Week.png → ...3/Figures/NYT Avg Sentiment Over Week.png b/...alysis/Q3/NYT Avg Sentiment Over Week.png → ...3/Figures/NYT Avg Sentiment Over Week.png
diff --git a/DataAnalysis/Q3/NYTimes_all_bigrams_top.png → ...is/Q3/Figures/NYTimes_all_bigrams_top.png b/DataAnalysis/Q3/NYTimes_all_bigrams_top.png → ...is/Q3/Figures/NYTimes_all_bigrams_top.png
diff --git a/DataAnalysis/Q3/Q3WordCloudNegFoxNews.png → ...ysis/Q3/Figures/Q3WordCloudNegFoxNews.png b/DataAnalysis/Q3/Q3WordCloudNegFoxNews.png → ...ysis/Q3/Figures/Q3WordCloudNegFoxNews.png
diff --git a/...Analysis/Q3/Q3WordCloudNeutralFoxNews.png → .../Q3/Figures/Q3WordCloudNeutralFoxNews.png b/...Analysis/Q3/Q3WordCloudNeutralFoxNews.png → .../Q3/Figures/Q3WordCloudNeutralFoxNews.png
diff --git a/DataAnalysis/Q3/Q3WordCloudNeutralNYT.png → ...ysis/Q3/Figures/Q3WordCloudNeutralNYT.png b/DataAnalysis/Q3/Q3WordCloudNeutralNYT.png → ...ysis/Q3/Figures/Q3WordCloudNeutralNYT.png
diff --git a/...nalysis/Q3/Q3WordCloudPositiveFoxNews.png → ...Q3/Figures/Q3WordCloudPositiveFoxNews.png b/...nalysis/Q3/Q3WordCloudPositiveFoxNews.png → ...Q3/Figures/Q3WordCloudPositiveFoxNews.png
diff --git a/DataAnalysis/Q3/Q3WordCloudPositiveNYT.png → ...sis/Q3/Figures/Q3WordCloudPositiveNYT.png b/DataAnalysis/Q3/Q3WordCloudPositiveNYT.png → ...sis/Q3/Figures/Q3WordCloudPositiveNYT.png
diff --git a/DataAnalysis/Q3/Q3WorldCloudNegativeNYT.png → ...is/Q3/Figures/Q3WorldCloudNegativeNYT.png b/DataAnalysis/Q3/Q3WorldCloudNegativeNYT.png → ...is/Q3/Figures/Q3WorldCloudNegativeNYT.png
diff --git a/...ysis/Q3/Weekly Ukraine Related Tweets.png → ...Figures/Weekly Ukraine Related Tweets.png b/...ysis/Q3/Weekly Ukraine Related Tweets.png → ...Figures/Weekly Ukraine Related Tweets.png
diff --git a/DataAnalysis/Q3/bigram_Nytimes_positive.png → ...is/Q3/Figures/bigram_Nytimes_positive.png b/DataAnalysis/Q3/bigram_Nytimes_positive.png → ...is/Q3/Figures/bigram_Nytimes_positive.png
diff --git a/DataAnalysis/Q3/bigram_fox_news_neutral.png → ...is/Q3/Figures/bigram_fox_news_neutral.png b/DataAnalysis/Q3/bigram_fox_news_neutral.png → ...is/Q3/Figures/bigram_fox_news_neutral.png
diff --git a/DataAnalysis/Q3/bigram_nyt_negative.png → ...alysis/Q3/Figures/bigram_nyt_negative.png b/DataAnalysis/Q3/bigram_nyt_negative.png → ...alysis/Q3/Figures/bigram_nyt_negative.png
diff --git a/DataAnalysis/Q3/bigram_nyt_neutral_.png → ...alysis/Q3/Figures/bigram_nyt_neutral_.png b/DataAnalysis/Q3/bigram_nyt_neutral_.png → ...alysis/Q3/Figures/bigram_nyt_neutral_.png
diff --git a/DataAnalysis/Q3/bigramnyt.png → DataAnalysis/Q3/Figures/bigramnyt.png b/DataAnalysis/Q3/bigramnyt.png → DataAnalysis/Q3/Figures/bigramnyt.png
diff --git a/DataAnalysis/Q3/foxnegbigram.png → DataAnalysis/Q3/Figures/foxnegbigram.png b/DataAnalysis/Q3/foxnegbigram.png → DataAnalysis/Q3/Figures/foxnegbigram.png
diff --git a/DataAnalysis/Q3/q3_weeklyTweetCount.png → ...alysis/Q3/Figures/q3_weeklyTweetCount.png b/DataAnalysis/Q3/q3_weeklyTweetCount.png → ...alysis/Q3/Figures/q3_weeklyTweetCount.png
diff --git a/DataAnalysis/Q3/MakeWordCloud.R b/DataAnalysis/Q3/MakeWordCloud.R
@@ -6,10 +6,10 @@ p_load(dplyr)
 p_load(ggplot2)
 
 # source file -- change the file path here
-foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
-nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
-foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
-nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
+foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
+nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
+foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
+nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
 
 make_cloud <- function(dataset, sentiment){
   positive <- dataset[dataset$label == sentiment,] 

diff --git a/DataAnalysis/Q3/Q3LinePlot.R b/DataAnalysis/Q3/Q3LinePlot.R
@@ -1,16 +1,16 @@
 library(ggplot2)
 library(dplyr)
-
+library(pacman)
+p_load(plotly)
 # csv file -- change the file path here
-#q1 <-  read.csv('/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q1/all_tweets_emotions_with_sentiment.csv')
-foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
-nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
-#foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
-#nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
-q1 <- nytimes
+#foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
+#nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
+#foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
+nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
+q1 <- nytitle
 
 #filter tweets that starts with @
-q1 <- q1 %>%  filter(!grepl("^@", q1$text))
+#q1 <- q1 %>%  filter(!grepl("^@", q1$text))
 
 
 ## stacked bar plot ##
@@ -42,7 +42,7 @@ sentiment_by_month <- q1 %>%
   arrange(YearMonth)
 
 # graph (by week)
-ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week, col=label)) + 
+p <- ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week, col=label)) + 
   geom_line(lwd=1.5) +
   theme_minimal() + 
   theme(panel.background = element_blank()) +
@@ -57,4 +57,11 @@ ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week, col=label)) +
   theme(legend.text = element_text(size=8)) +
   scale_x_continuous(breaks=c(1,11,16,23), labels=c("Dec 24th", "Feb 24th", "April 7th", "May 24th"))
 #scale_color_manual('label', values=c('#d9534f', '#f0ad4e', '#5cb85c', '#5cb86c', '#5cb87c', '#5cb88c', '#5cb89c'))
+ggplotly(p)
+
+
+
+
+
+
 
diff --git a/DataAnalysis/Q3/Q3RAnalysisSheikh.R → ...nalysis/Q3/Q3SidebySideBarPlotSentiment.R b/DataAnalysis/Q3/Q3RAnalysisSheikh.R → ...nalysis/Q3/Q3SidebySideBarPlotSentiment.R
@@ -11,13 +11,16 @@ p_load(gridExtra)
 p_load(stringr)
 p_load(tidytext)
 p_load(tidyr)
+
 #reading data
-foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
-nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
-foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
-nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
+
+foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
+nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
+foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
+nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
 #######3#preprocess everything
 
+
 nytimes <- nytimes %>% select(Date, text, label, score)
 nytimes$Date <- sub(" .*", "", nytimes$Date) %>% as.Date(format="%Y-%m-%d", tz="UTC")
 nytimes$text <- tolower(nytimes$text)
@@ -138,37 +141,3 @@ ggplot(allSent, aes(x=Sentiment, y=Percentage, fill=Source)) +
 
 
 ############################################
-#cooccurance score
-
-coocc_func <- function(foxnews){
-  fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
-                                       n=2) %>% 
-    anti_join(stop_words)
-  bg_fox <- fox_unn %>% 
-    separate(word, c("word1", "word2"), sep=" ")
-
-  avoid_list <- c("russia", "ukraine", "user", "http")
-  filter_bg_fox <- bg_fox %>% 
-    filter(!word1 %in% stop_words$word) %>% 
-    filter(!word2 %in% stop_words$word) %>% 
-    filter(!word1 %in% avoid_list) %>% 
-    filter(!word2 %in% avoid_list)
-
-  count_bg <- filter_bg_fox %>% 
-    group_by(word1, word2) %>% 
-    tally(sort = TRUE)
-  return(count_bg)  
-}
-fox_pos <- foxnews %>% filter(label=="Positive")
-fox_neg <- foxnews %>% filter(label=="Negative")
-fox_neu <- foxnews %>% filter(label=="Neutral")
-
-nyt_pos <- nytimes %>% filter(label=="Positive")
-nyt_neg <- nytimes %>% filter(label=="Negative")
-nyt_neu <- nytimes %>% filter(label=="Neutral")
-
-coocc_func(foxnews)
-coocc_func(nytimes)
-
-
-
diff --git a/DataAnalysis/Q3/Q3TitleAnalysis.R b/DataAnalysis/Q3/Q3TitleAnalysis.R
@@ -9,8 +9,10 @@ p_load(stringr)
 #reading data
 
 
-foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
-nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
+foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
+nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
+foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
+nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
 #######3#preprocess everything
 
 nytitle <- nytitle %>% select(Date, text, label, score)

diff --git a/DataAnalysis/Q3/trigramWordCloud.R b/DataAnalysis/Q3/trigramWordCloud.R
@@ -12,11 +12,10 @@ p_load(wordcloud)
 p_load(tm)
 
 ####Loading all the data
-foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
-nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
-foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
-nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
-
+foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
+nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
+foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
+nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
 trigram_wc <- function(foxnews){
   fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
                                        n=3) %>% 

diff --git a/DataAnalysis/READMESHEIKH.md b/DataAnalysis/READMESHEIKH.md
@@ -0,0 +1,20 @@
+# How to handle my files ~ Sheikh
+
+## Naming convention of the csv files used here:
+> All the data files are kept in: ./SentimentAnalysis/data/q3/May30Scrap  
+1. Files containing the tweets themselves are imported as foxnews and nytimes. They contain all the tweets that had been scrapped using Nikita's query (and contains replies). These files are named as foxalltweets and nytalltweets in the folder
+2. Files that ONLY contain the tweets from the news sources are imported as foxtitle and nytitle
+The reason to scrap the tweets of no 2 is that for question 1, I used them to find some observations that is impossible to find using data used in 1, as they are already filtered to contain Ukrainan tweets. 
+
+## Data Analysis
+> All the R files used for data analysis is kept here: ./DataAnalysis/Q3
+>> There is a folder called Figues, which contains some corresponding output figures, however, some of them contain figures generated from old data, so better not to be meddled with. We can generate new figues instantly from our R files
+
+The names of the R files are self explanatory for what they do. Still some points:
+- trigramWordCloud.R doesn't work as expected, so we didnt' use its output. 
+- Q3TitleAnalysis.R and NumberofWarRelatedTweets.R are the files that uses data scrapped in 2.
+- Q3SidebySideBar plots the overall sentiment of the tweets bar plot
+- For wordcloud files after running the entire code, running the function on the terminal using proper dataset and parameters would give proper result
+
+
+
diff --git a/SentimentAnalysis/.DS_Store b/SentimentAnalysis/.DS_Store
diff --git a/SentimentAnalysis/TwitterSentimentAnalysis-jvsc-75e9ae0c-77e8-4242-9274-c34c97eb56f4.ipynb b/SentimentAnalysis/TwitterSentimentAnalysis-jvsc-75e9ae0c-77e8-4242-9274-c34c97eb56f4.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/SentimentAnalysis/data/.DS_Store b/SentimentAnalysis/data/.DS_Store