Skip to content

Commit

Permalink
added a readme file for my work and reorganized files
Browse files Browse the repository at this point in the history
  • Loading branch information
sheikhshafayat committed Jun 4, 2022
1 parent 85c3ba9 commit 6f2bb53
Show file tree
Hide file tree
Showing 39 changed files with 477,332 additions and 58 deletions.
Binary file modified .DS_Store
Binary file not shown.
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
8 changes: 4 additions & 4 deletions DataAnalysis/Q3/MakeWordCloud.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ p_load(dplyr)
p_load(ggplot2)

# source file -- change the file path here
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")

make_cloud <- function(dataset, sentiment){
positive <- dataset[dataset$label == sentiment,]
Expand Down
25 changes: 16 additions & 9 deletions DataAnalysis/Q3/Q3LinePlot.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
library(ggplot2)
library(dplyr)

library(pacman)
p_load(plotly)
# csv file -- change the file path here
#q1 <- read.csv('/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q1/all_tweets_emotions_with_sentiment.csv')
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
#foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
#nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
q1 <- nytimes
#foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
#nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
#foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
q1 <- nytitle

#filter tweets that starts with @
q1 <- q1 %>% filter(!grepl("^@", q1$text))
#q1 <- q1 %>% filter(!grepl("^@", q1$text))


## stacked bar plot ##
Expand Down Expand Up @@ -42,7 +42,7 @@ sentiment_by_month <- q1 %>%
arrange(YearMonth)

# graph (by week)
ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week, col=label)) +
p <- ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week, col=label)) +
geom_line(lwd=1.5) +
theme_minimal() +
theme(panel.background = element_blank()) +
Expand All @@ -57,4 +57,11 @@ ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week, col=label)) +
theme(legend.text = element_text(size=8)) +
scale_x_continuous(breaks=c(1,11,16,23), labels=c("Dec 24th", "Feb 24th", "April 7th", "May 24th"))
#scale_color_manual('label', values=c('#d9534f', '#f0ad4e', '#5cb85c', '#5cb86c', '#5cb87c', '#5cb88c', '#5cb89c'))
ggplotly(p)







Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,16 @@ p_load(gridExtra)
p_load(stringr)
p_load(tidytext)
p_load(tidyr)

#reading data
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")

foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
#######3#preprocess everything


nytimes <- nytimes %>% select(Date, text, label, score)
nytimes$Date <- sub(" .*", "", nytimes$Date) %>% as.Date(format="%Y-%m-%d", tz="UTC")
nytimes$text <- tolower(nytimes$text)
Expand Down Expand Up @@ -138,37 +141,3 @@ ggplot(allSent, aes(x=Sentiment, y=Percentage, fill=Source)) +


############################################
#cooccurance score

coocc_func <- function(foxnews){
fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
n=2) %>%
anti_join(stop_words)
bg_fox <- fox_unn %>%
separate(word, c("word1", "word2"), sep=" ")

avoid_list <- c("russia", "ukraine", "user", "http")
filter_bg_fox <- bg_fox %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
filter(!word1 %in% avoid_list) %>%
filter(!word2 %in% avoid_list)

count_bg <- filter_bg_fox %>%
group_by(word1, word2) %>%
tally(sort = TRUE)
return(count_bg)
}
fox_pos <- foxnews %>% filter(label=="Positive")
fox_neg <- foxnews %>% filter(label=="Negative")
fox_neu <- foxnews %>% filter(label=="Neutral")

nyt_pos <- nytimes %>% filter(label=="Positive")
nyt_neg <- nytimes %>% filter(label=="Negative")
nyt_neu <- nytimes %>% filter(label=="Neutral")

coocc_func(foxnews)
coocc_func(nytimes)



6 changes: 4 additions & 2 deletions DataAnalysis/Q3/Q3TitleAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ p_load(stringr)
#reading data


foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
#######3#preprocess everything

nytitle <- nytitle %>% select(Date, text, label, score)
Expand Down
9 changes: 4 additions & 5 deletions DataAnalysis/Q3/trigramWordCloud.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@ p_load(wordcloud)
p_load(tm)

####Loading all the data
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")

foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxalltweets_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytalltweets_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/foxtitle_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/May30Scrap/nytitle_with_sentiment.csv")
trigram_wc <- function(foxnews){
fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
n=3) %>%
Expand Down
20 changes: 20 additions & 0 deletions DataAnalysis/READMESHEIKH.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# How to handle my files ~ Sheikh

## Naming convention of the csv files used here:
> All the data files are kept in: ./SentimentAnalysis/data/q3/May30Scrap
1. Files containing the tweets themselves are imported as foxnews and nytimes. They contain all the tweets that had been scrapped using Nikita's query (and contains replies). These files are named as foxalltweets and nytalltweets in the folder
2. Files that ONLY contain the tweets from the news sources are imported as foxtitle and nytitle
The reason to scrap the tweets of no 2 is that for question 1, I used them to find some observations that is impossible to find using data used in 1, as they are already filtered to contain Ukrainan tweets.

## Data Analysis
> All the R files used for data analysis is kept here: ./DataAnalysis/Q3
>> There is a folder called Figues, which contains some corresponding output figures, however, some of them contain figures generated from old data, so better not to be meddled with. We can generate new figues instantly from our R files
The names of the R files are self explanatory for what they do. Still some points:
- trigramWordCloud.R doesn't work as expected, so we didnt' use its output.
- Q3TitleAnalysis.R and NumberofWarRelatedTweets.R are the files that uses data scrapped in 2.
- Q3SidebySideBar plots the overall sentiment of the tweets bar plot
- For wordcloud files after running the entire code, running the function on the terminal using proper dataset and parameters would give proper result



Binary file modified SentimentAnalysis/.DS_Store
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
Binary file modified SentimentAnalysis/data/.DS_Store
Binary file not shown.
Loading

0 comments on commit 6f2bb53

Please sign in to comment.