Skip to content

Commit

Permalink
Fixed conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
Bensas committed May 26, 2022
2 parents 36a7e97 + baa18d0 commit 0901245
Show file tree
Hide file tree
Showing 19 changed files with 194 additions and 43 deletions.
Binary file modified DataAnalysis/Q1/Q1_sentiment_weekly.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified DataAnalysis/Q1/negativetweets_wordcloud.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified DataAnalysis/Q1/neutraltweets_wordcloud.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified DataAnalysis/Q1/positivetweets_wordcloud.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion DataAnalysis/Q1/q1_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ ggplot(sentiment_by_week, aes(fill=label, y=freq, x=Week)) +
theme(axis.text.y = element_text(size=13)) +
theme(legend.title = element_text(face='bold', size=15)) +
theme(legend.text = element_text(size=15)) +
scale_x_continuous(breaks=c(1,11,16,23), labels=c("Dec 24th", "Feb 24th", "April 7th", "May 24th")) +
scale_x_continuous(breaks=c(1,4,11,16,23), labels=c("Dec 24th", "Jan 14th", "Feb 24th", "April 7th", "May 24th")) +
scale_fill_manual('label', values=c('#d9534f', '#f0ad4e', '#5cb85c'))

# graph (by month)
Expand Down
12 changes: 6 additions & 6 deletions DataAnalysis/Q2/q2_before_after_russian_tweets.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,20 @@ plot_barplot_before_after(q2_nato_russian, "Sentiment of russian tweets containi

# English

q2_zelensky_english <- read.csv('UkraineConflictOnTwitter/SentimentAnalysis/data/q2/zelensky_english_with_sentiment.csv')
q2_zelensky_english <- read.csv('/Users/Bensas/ITBA/Intercambios/KAIST/Data\ Science\ Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q2/zelensky_english_with_sentiment.csv')
conduct_chisq_before_after(q2_zelensky_english)
plot_barplot_before_after(q2_zelensky_english, "Sentiment of english tweets containing \"Zelensky\"")
# p-value for 1 week before/after = 7.735e-14 (significant)
# p-value for 1 week before/after = 2.153e-11 (significant)

q2_putin_english <- read.csv('UkraineConflictOnTwitter/SentimentAnalysis/data/q2/putin_english_with_sentiment.csv')
q2_putin_english <- read.csv('/Users/Bensas/ITBA/Intercambios/KAIST/Data\ Science\ Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q2/putin_english_with_sentiment.csv')
conduct_chisq_before_after(q2_putin_english)
plot_barplot_before_after(q2_putin_english, "Sentiment of english tweets containing \"Putin\"")
# p-value for 1 week before/after = 0.1959 (insignificant)
# p-value for 1 week before/after = 0.01354 (significant)

q2_nato_english <- read.csv('UkraineConflictOnTwitter/SentimentAnalysis/data/q2/nato_english_with_sentiment.csv')
q2_nato_english <- read.csv('/Users/Bensas/ITBA/Intercambios/KAIST/Data\ Science\ Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q2/nato_english_with_sentiment.csv')
conduct_chisq_before_after(q2_nato_english)
plot_barplot_before_after(q2_nato_english, "Sentiment of english tweets containing \"NATO\"")
# p-value for 1 week before/after = 0.1518 (insignificant)
# p-value for 1 week before/after = 0.04421 (significant)



58 changes: 58 additions & 0 deletions DataAnalysis/Q3/BigramWordCloud.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
library(pacman) #my package manager

#load necessary packages
p_load(ggplot2)
p_load(dplyr)
p_load(reshape2)
p_load(gridExtra)
p_load(stringr)
p_load(tidytext)
p_load(tidyr)
p_load(wordcloud)
p_load(tm)

####Loading all the data
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")

bigram_wc <- function(foxnews){
fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
n=2) %>%
anti_join(stop_words)
bg_fox <- fox_unn %>%
separate(word, c("word1", "word2"), sep=" ")

avoid_list <- c("russia", "ukraine", "user", "http", "fox", "york")
filter_bg_fox <- bg_fox %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
filter(!word1 %in% avoid_list) %>%
filter(!word2 %in% avoid_list)

count_bg <- filter_bg_fox %>%
group_by(word1, word2) %>%
tally(sort = TRUE)

count_bg <- as.data.frame(count_bg)

count_bg$bigram <- paste(count_bg$word1, count_bg$word2, sep=" ")
wc <- wordcloud(words = count_bg$bigram, freq = count_bg$n, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))

return(wc)
}

fox_pos <- foxnews %>% filter(label=="Positive")
fox_neg <- foxnews %>% filter(label=="Negative")
fox_neu <- foxnews %>% filter(label=="Neutral")

nyt_pos <- nytimes %>% filter(label=="Positive")
nyt_neg <- nytimes %>% filter(label=="Negative")
nyt_neu <- nytimes %>% filter(label=="Neutral")

type(coocc_func(foxnews))
df <- coocc_func(nytimes)
df$bigram <- paste(df$word1, df$word2, sep=" ")
head(df)
Binary file added DataAnalysis/Q3/Bigram_Fox_Pos.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added DataAnalysis/Q3/Bigram_fox_neg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
48 changes: 48 additions & 0 deletions DataAnalysis/Q3/Cooccurance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
library(pacman) #my package manager

#load necessary packages
p_load(ggplot2)
p_load(dplyr)
p_load(reshape2)
p_load(gridExtra)
p_load(stringr)
p_load(tidytext)
p_load(tidyr)

####Loading all the data
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")

coocc_func <- function(foxnews){
fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
n=2) %>%
anti_join(stop_words)
bg_fox <- fox_unn %>%
separate(word, c("word1", "word2"), sep=" ")

avoid_list <- c("russia", "ukraine", "user", "http")
filter_bg_fox <- bg_fox %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
filter(!word1 %in% avoid_list) %>%
filter(!word2 %in% avoid_list)

count_bg <- filter_bg_fox %>%
group_by(word1, word2) %>%
tally(sort = TRUE)
return(count_bg)
}
fox_pos <- foxnews %>% filter(label=="Positive")
fox_neg <- foxnews %>% filter(label=="Negative")
fox_neu <- foxnews %>% filter(label=="Neutral")

nyt_pos <- nytimes %>% filter(label=="Positive")
nyt_neg <- nytimes %>% filter(label=="Negative")
nyt_neu <- nytimes %>% filter(label=="Neutral")

type(coocc_func(foxnews))
df <- coocc_func(nytimes)
df$bigram <- paste(df$word1, df$word2, sep=" ")
head(df)
Binary file added DataAnalysis/Q3/Fox_all_bigrams.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
45 changes: 45 additions & 0 deletions DataAnalysis/Q3/MakeWordCloud.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Q1 wordcloud
library(pacman)
p_load(wordcloud)
p_load(tm)
p_load(dplyr)
p_load(ggplot2)

# source file -- change the file path here
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
foxtitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/FoxNews_Sheikh_with_sentiment.csv")
nytitle <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/NYT_Sheikh_with_sentiment.csv")

make_cloud <- function(dataset, sentiment){
positive <- fox_cloud[fox_cloud$label == sentiment,]

# remove non-ascii words
positive$text <- stringi::stri_trans_general(positive$text, "latin-ascii")
positive$text <- gsub("[^\x01-\x7F]", "", positive$text)

# create corpus and preprocess data
docs <- Corpus(VectorSource(positive$text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
docs <- tm_map(docs, removeWords, c("russia", "ukraine", "user", "http")) # remove "Russia" and "Ukraine"

# create matrix
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)

# create wordcloud
set.seed(1234)
wc <- wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
return(wc)

}

make_cloud(foxnews, "Neutral")
Binary file added DataAnalysis/Q3/NYTimes_all_bigrams_top.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
36 changes: 36 additions & 0 deletions DataAnalysis/Q3/Q3RAnalysisSheikh.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ p_load(dplyr)
p_load(reshape2)
p_load(gridExtra)
p_load(stringr)
p_load(tidytext)
p_load(tidyr)
#reading data
foxnews <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/fox_news_Final_with_sentiment.csv")
nytimes <- read.csv("/Volumes/GoogleDrive/My Drive/Spring 2022/Data Science Methodology/UkraineConflictOnTwitter/SentimentAnalysis/data/q3/new_york_times_Final_with_sentiment.csv")
Expand Down Expand Up @@ -130,4 +132,38 @@ ggplot(allSent, aes(x=Sentiment, y=Percentage, fill=Source)) +



############################################
#cooccurance score

coocc_func <- function(foxnews){
fox_unn <- foxnews %>% unnest_tokens(word, text, token = "ngrams",
n=2) %>%
anti_join(stop_words)
bg_fox <- fox_unn %>%
separate(word, c("word1", "word2"), sep=" ")

avoid_list <- c("russia", "ukraine", "user", "http")
filter_bg_fox <- bg_fox %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
filter(!word1 %in% avoid_list) %>%
filter(!word2 %in% avoid_list)

count_bg <- filter_bg_fox %>%
group_by(word1, word2) %>%
tally(sort = TRUE)
return(count_bg)
}
fox_pos <- foxnews %>% filter(label=="Positive")
fox_neg <- foxnews %>% filter(label=="Negative")
fox_neu <- foxnews %>% filter(label=="Neutral")

nyt_pos <- nytimes %>% filter(label=="Positive")
nyt_neg <- nytimes %>% filter(label=="Negative")
nyt_neu <- nytimes %>% filter(label=="Neutral")

coocc_func(foxnews)
coocc_func(nytimes)



36 changes: 0 additions & 36 deletions DataAnalysis/Q3/Q3WordCloudPosFoxNews.R

This file was deleted.

Binary file added DataAnalysis/Q3/bigram_Nytimes_positive.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added DataAnalysis/Q3/bigram_fox_news_neutral.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added DataAnalysis/Q3/bigram_nyt_negative.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added DataAnalysis/Q3/bigram_nyt_neutral_.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 0901245

Please sign in to comment.