From 119865fd1f1da98430d061a6d5a9b1be2d649531 Mon Sep 17 00:00:00 2001
From: Kapya <kapsakala@gmail.com>
Date: Mon, 13 Jan 2020 07:43:28 +0200
Subject: [PATCH 01/22] changed git commands to use namibia branch

---
 reporting/alldata.sh  | 4 ++--
 reporting/monthend.sh | 4 ++--
 upgrade               | 4 ++--
 upgrade_baseline.sh   | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/reporting/alldata.sh b/reporting/alldata.sh
index bd0bbcc..91f70c4 100755
--- a/reporting/alldata.sh
+++ b/reporting/alldata.sh
@@ -25,8 +25,8 @@ export exit_bold_mode=`tput rmso`
 
 #pull latest changes from master branch in repo
 cd ~/.scripts
-git reset --hard origin/zambia > /dev/null
-git pull origin zambia > /dev/null
+git reset --hard origin/namibia > /dev/null
+git pull origin namibia > /dev/null
 
 # Do silent upgrade of all scripts
 ./upgrade_silent.sh
diff --git a/reporting/monthend.sh b/reporting/monthend.sh
index 61b294d..015df3b 100755
--- a/reporting/monthend.sh
+++ b/reporting/monthend.sh
@@ -28,8 +28,8 @@ export exit_bold_mode=`tput rmso`
 
 #pull latest changes from master branch in repo
 cd ~/.scripts
-git reset --hard origin/zambia > /dev/null
-git pull origin zambia > /dev/null
+git reset --hard origin/namibia > /dev/null
+git pull origin namibia > /dev/null
 
 #Do silent upgrade of all scripts
 ./upgrade_silent.sh > /dev/null
diff --git a/upgrade b/upgrade
index 8998285..87ea9fc 100755
--- a/upgrade
+++ b/upgrade
@@ -1,6 +1,6 @@
 #!/bin/bash
 
 cd ~/.scripts
-git reset --hard origin/zambia
-git pull origin zambia
+git reset --hard origin/namibia
+git pull origin namibia
 ./setup.sh
diff --git a/upgrade_baseline.sh b/upgrade_baseline.sh
index 481246f..2985ebe 100755
--- a/upgrade_baseline.sh
+++ b/upgrade_baseline.sh
@@ -32,7 +32,7 @@ if [ "$?" = "0" ]; then
 	echo "${blue}Baseline Testing already configured${reset}"
 	cd ~/.baseline_testing/
 	echo "Pulling latest changes to Baseline system..."
-	git pull origin zambia > /dev/null
+	git pull origin namibia > /dev/null
 
 	#make script executable if it isnt
 	chmod +x ~/.baseline_testing/scripts/setup.sh
@@ -43,7 +43,7 @@ else
 	echo "${blue}Cloning repository...${reset}"
 	git clone https://github.com/techZM/offline_testing.git .baseline_testing > /dev/null
 	cd ~/.baseline_testing/
-	git checkout zambia
+	git checkout namibia
 
 	#make script executable if it isnt
 	chmod +x ~/.baseline_testing/scripts/setup.sh

From 5aba0372c8364a989aaecf682454e74d2a57042a Mon Sep 17 00:00:00 2001
From: techZM <techzm@edulution.org>
Date: Wed, 15 Jan 2020 22:53:41 +0200
Subject: [PATCH 02/22] add kolibri stop shutdown alias

---
 .bash_aliases | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.bash_aliases b/.bash_aliases
index 005eade..00eeaec 100755
--- a/.bash_aliases
+++ b/.bash_aliases
@@ -1,4 +1,4 @@
-alias shutdown='~/.scripts/backupdb/backup.sh;sudo shutdown -h now'
+alias shutdown='kolibri stop;~/.scripts/backupdb/backup.sh;sudo shutdown -h now'
 alias reboot='sudo reboot'
 alias whoru='cd ~/.scripts/identify/;python identify.py'
 alias alldata='~/.scripts/reporting/alldata.sh'

From 8c6addd681fb81589e9dff6cfbb02b07aaae8b26 Mon Sep 17 00:00:00 2001
From: L3Vyt <47414785+L3Vyt@users.noreply.github.com>
Date: Wed, 29 Jan 2020 18:14:03 +0200
Subject: [PATCH 03/22] Update .bash_aliases

---
 .bash_aliases | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.bash_aliases b/.bash_aliases
index 00eeaec..e83a3da 100755
--- a/.bash_aliases
+++ b/.bash_aliases
@@ -16,5 +16,5 @@ alias restartbl='chmod +x ~/.baseline_testing/scripts/startup_script;~/.baseline
 alias getkousers='chmod +x ~/.baseline_testing/scripts/start_users_extraction.sh;~/.baseline_testing/scripts/start_users_extraction.sh'
 
 # assign live learners to the right groups
-alias assign_learners='~/.kolibri_helper_scripts/assign_learners.sh'
+alias assign_learners='~/.kolibri_helper_scripts/assign_learners.sh;chmod +x ~/.baseline_testing/scripts/startup_script;~/.baseline_testing/scripts/startup_script'
 

From 8a21094c302eb96d79df01edadfae473731e60bc Mon Sep 17 00:00:00 2001
From: Kapya <kapsakala@gmail.com>
Date: Tue, 4 Aug 2020 15:44:51 +0200
Subject: [PATCH 04/22] use paste instead of paste0

---
 reporting/preproc_tables.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R
index 17e691b..c89f9a9 100755
--- a/reporting/preproc_tables.R
+++ b/reporting/preproc_tables.R
@@ -56,10 +56,10 @@ if(nrow(users) == 0){
 channel_metadata <<- channel_metadata %>%
   left_join(channel_module, by=c("id" = "channel_id")) %>%
   # create new column with module and abbreviated playlist name
-  mutate(abbr_name = paste(module,'_',abbreviate(name))) %>%
+  mutate(abbr_name = paste0(module,'_',abbreviate(name))) %>%
   # create new column with abbr name and the word progress
   # will be used as the column name for channel progress in final report
-  mutate(abbr_name_progress = paste(abbr_name,'_progress'))
+  mutate(abbr_name_progress = paste0(abbr_name,'_progress'))
 
 #create named vector with channel_ids and abbreviated playlist names
 course_name_id <- unlist(channel_metadata$abbr_name)

From 07fad8180c9cc3a4af7a8c6d8a3f623e7bf76ccb Mon Sep 17 00:00:00 2001
From: Kapya <kapsakala@gmail.com>
Date: Thu, 8 Oct 2020 11:05:45 +0200
Subject: [PATCH 05/22] ignore csv files

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 992e600..7a83080 100755
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 backupdb/*.log
 crazyhours.txt
+*.csv
 .Rproj.user
 config.yml
 *.Rproj

From b6899d91eb9b84be89b8a305a38d9925a06db21a Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Mon, 15 Feb 2021 15:52:22 +0200
Subject: [PATCH 06/22] changed to namibia branch

---
 config/flyway_bl.sh | 4 ++--
 upgrade_silent.sh   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/config/flyway_bl.sh b/config/flyway_bl.sh
index 6a21289..5e3af31 100755
--- a/config/flyway_bl.sh
+++ b/config/flyway_bl.sh
@@ -9,7 +9,7 @@ function flyway_bl(){
 	# Switch to baseline testing directory
 	cd $BL_DIR || exit
 	# Reset the code to match the remote branch
-	git reset --hard origin/develop
+	git reset --hard origin/namibia
 
 	# path to migrations for baseline testing database
 	BL_SQL_PATH=$BL_DIR/migrations
@@ -25,7 +25,7 @@ function flyway_bl(){
 	sudo flyway -locations="filesystem:$BL_SQL_PATH" -url=jdbc:postgresql://$BASELINE_DATABASE_HOST:$BASELINE_DATABASE_PORT/$BASELINE_DATABASE_NAME -user=$BASELINE_DATABASE_USER  -password=$BASELINE_DATABASE_PASSWORD "$1"
 
 	# Reset the code to match the remote branch once again
-	git reset --hard origin/develop
+	git reset --hard origin/namibia
 	# switch to the home directory or exit
 	cd || exit
 }
diff --git a/upgrade_silent.sh b/upgrade_silent.sh
index 8fe98b3..e7fe445 100755
--- a/upgrade_silent.sh
+++ b/upgrade_silent.sh
@@ -43,7 +43,7 @@ fi
 
 # Check if kolibri helper scripts directory exists. pull it if it does not
 if [ -d "$kolibri_helper_scripts_dir" ]; then
-	cd $kolibri_helper_scripts_dir && git reset --hard origin/develop && git pull origin develop && cd ~ || exit
+	cd $kolibri_helper_scripts_dir && git reset --hard origin/namibia && git pull origin namibia && cd ~ || exit
 else
 	echo "Helper scripts directory does not exist. Cloning now..."
 	git clone https://github.com/edulution/kolibri_helper_scripts.git $kolibri_helper_scripts_dir

From facff5223fe9e3cd74258a81241e618f5496c0c3 Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Thu, 14 Oct 2021 10:37:32 +0200
Subject: [PATCH 07/22] delete learners on provided list when silent upgrade is
 triggered

---
 upgrade_silent.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/upgrade_silent.sh b/upgrade_silent.sh
index f7f4cac..aa7a4f4 100755
--- a/upgrade_silent.sh
+++ b/upgrade_silent.sh
@@ -60,4 +60,7 @@ fi
 
 # Add any other scripts you would like to run below this line
 ###################
-~/.scripts/config/add_pgtune_settings.sh
\ No newline at end of file
+~/.scripts/config/add_pgtune_settings.sh
+
+# Delete learners on provided list
+python ~/.kolibri_helper_scripts/delete_learners.py -f ~/.kolibri_helper_scripts/NA_deletion_1021.csv
\ No newline at end of file

From f1944e0f6ca22fa2464888739439bf55ed19f97a Mon Sep 17 00:00:00 2001
From: Ntipac <ntch98@gmail.com>
Date: Mon, 8 Nov 2021 11:48:22 +0200
Subject: [PATCH 08/22] Updated script from deleting any restored learners

---
 upgrade_silent.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/upgrade_silent.sh b/upgrade_silent.sh
index aa7a4f4..f75ce03 100755
--- a/upgrade_silent.sh
+++ b/upgrade_silent.sh
@@ -62,5 +62,3 @@ fi
 ###################
 ~/.scripts/config/add_pgtune_settings.sh
 
-# Delete learners on provided list
-python ~/.kolibri_helper_scripts/delete_learners.py -f ~/.kolibri_helper_scripts/NA_deletion_1021.csv
\ No newline at end of file

From c955477e5a06b49bfcf8e0cb996f8552802c5fd3 Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Mon, 15 Nov 2021 16:45:09 +0200
Subject: [PATCH 09/22] delete any loose csv files in the reports directory
 before beginning report extraction

---
 reporting/monthend.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/reporting/monthend.sh b/reporting/monthend.sh
index 3bf841b..97caea4 100755
--- a/reporting/monthend.sh
+++ b/reporting/monthend.sh
@@ -34,6 +34,10 @@ if [[ "$psql_running" == 'Running' ]];then
 	   sudo service nginx stop > /dev/null
        echo "${GREEN}Extracting data for month $1${RESET}"
 
+       # Delete any loose csv files in the reports directory before extraction
+       cd ~/.reports
+       find . -type f \( -name "*.csv" \) -exec rm {} \;
+
        echo Beginning report extraction.....
        # fetch the first argument given on the command line and use it as an argument to the Rscript
        cd ~/.scripts/reporting || exit

From 9735c1e2e0d61f53e8d602b6dc6e5ed0817226dc Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Fri, 11 Feb 2022 13:51:05 +0200
Subject: [PATCH 10/22] change timezone to Windhoek on last login

---
 reporting/preproc_tables.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R
index 2ecda2c..cfeb73d 100755
--- a/reporting/preproc_tables.R
+++ b/reporting/preproc_tables.R
@@ -47,7 +47,7 @@ if (nrow(users) == 0) {
     dplyr::rename(centre = name) %>%
     # Convert the last login to the nearest timezone for the centre location
     dplyr::mutate(
-      last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz("Africa/Lusaka")
+      last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz("Africa/Windhoek")
     ) %>%
     dplyr::select(
       id,

From f7550d72e65a7e67bcee259176400ff7c59363ea Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Tue, 1 Mar 2022 16:32:40 +0200
Subject: [PATCH 11/22] added script to reinstall dbhelpers package without
 upgrading other packages

(cherry picked from commit 56a13feac2e29d9575a3621ef10115085f942cbf)
---
 config/reinstall_dbhelpers_package.sh | 1 +
 1 file changed, 1 insertion(+)
 create mode 100755 config/reinstall_dbhelpers_package.sh

diff --git a/config/reinstall_dbhelpers_package.sh b/config/reinstall_dbhelpers_package.sh
new file mode 100755
index 0000000..0885761
--- /dev/null
+++ b/config/reinstall_dbhelpers_package.sh
@@ -0,0 +1 @@
+sudo su - -c "R -e \"devtools::install_github('Aypak/dbhelpers', upgrade = 'never')\""
\ No newline at end of file

From f1b18b628ef4bac61c277fb53e61576f0b9b8d10 Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Tue, 1 Mar 2022 16:36:12 +0200
Subject: [PATCH 12/22] added dbhelpers reinstall to silent upgrade

(cherry picked from commit 1f12a128645a25da369553dfed2725a495d92e5f)
---
 upgrade_silent.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/upgrade_silent.sh b/upgrade_silent.sh
index 37261dc..9793845 100755
--- a/upgrade_silent.sh
+++ b/upgrade_silent.sh
@@ -63,4 +63,7 @@ fi
 ~/.scripts/config/add_pgtune_settings.sh
 
 # Do channel subscriptions 
-~/.kolibri_helper_scripts/channel_setup/insert_channel_subscriptions.sh
\ No newline at end of file
+~/.kolibri_helper_scripts/channel_setup/insert_channel_subscriptions.sh
+
+# Reinstall dbhelpers package
+~/.scripts/config/reinstall_dbhelpers_package.sh
\ No newline at end of file

From 89b42181b41dceb5662287652713b35e7a79a131 Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Tue, 1 Mar 2022 16:44:06 +0200
Subject: [PATCH 13/22] added shebang to script

(cherry picked from commit 5498cd78d1d67b5073e6e94b15eb5f7ea3ecbb54)
---
 config/reinstall_dbhelpers_package.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/reinstall_dbhelpers_package.sh b/config/reinstall_dbhelpers_package.sh
index 0885761..c7ce963 100755
--- a/config/reinstall_dbhelpers_package.sh
+++ b/config/reinstall_dbhelpers_package.sh
@@ -1 +1,2 @@
+#!/bin/bash
 sudo su - -c "R -e \"devtools::install_github('Aypak/dbhelpers', upgrade = 'never')\""
\ No newline at end of file

From 4ec73e216f9e6dc12ec14d359df0f79c12d251f6 Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Wed, 9 Mar 2022 11:32:35 +0200
Subject: [PATCH 14/22] got class_name from memberships and collections. added
 class_name column to users df

(cherry picked from commit f3cebfbb56d5fce74117f81e4326a803ec643df6)
---
 reporting/preproc_tables.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R
index cfeb73d..f02925f 100755
--- a/reporting/preproc_tables.R
+++ b/reporting/preproc_tables.R
@@ -29,6 +29,18 @@ learners_and_groups <<- memberships %>%
   dplyr::distinct(user_id, .keep_all = TRUE) %>%
   dplyr::select(name, user_id)
 
+# Get learners and the classrooms (grades) they belong to
+learners_and_grades <- memberships %>%
+  # filter out memberships of type learnergroup
+  dplyr::filter(kind == "classroom") %>%
+  dplyr::group_by(user_id) %>%
+  # If a learner belongs to multiple classes, separate them with commas
+  dplyr::mutate(name = paste(name, collapse = ",") %>% stringr::str_trim()) %>%
+  dplyr::ungroup() %>%
+  dplyr::distinct(user_id, .keep_all = T) %>%
+  dplyr::select("class_name" = name, user_id)
+
+
 # filter out admins and coaches to get list of users
 # select only the relevant columns
 users <<- facilityusers %>%
@@ -45,6 +57,7 @@ if (nrow(users) == 0) {
     # then drop the facility_id column
     dplyr::left_join(facilities, by = c("facility_id" = "id")) %>%
     dplyr::rename(centre = name) %>%
+    dplyr::left_join(learners_and_grades, by = c("id"= "user_id")) %>% 
     # Convert the last login to the nearest timezone for the centre location
     dplyr::mutate(
       last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz("Africa/Windhoek")
@@ -55,6 +68,7 @@ if (nrow(users) == 0) {
       username,
       date_joined,
       last_login,
+      class_name,
       centre,
       facility_id
     ) %>%

From 89e95d6e70b8807891dd6b9a0b5f984d8701151e Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Wed, 9 Mar 2022 11:32:56 +0200
Subject: [PATCH 15/22] add class_name to final report extract

(cherry picked from commit 46aacd90fb02f3faceea6ac8afad1e0f796cb2d5)
---
 reporting/monthend.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/reporting/monthend.R b/reporting/monthend.R
index 62ac4d4..60797f0 100755
--- a/reporting/monthend.R
+++ b/reporting/monthend.R
@@ -123,6 +123,7 @@ monthend <- function(dates, sessionlogs, summarylogs, topics, device_name, inclu
       first_name,
       last_name,
       username,
+      class_name,
       centre,
       total_hours,
       total_exercises,

From e9f60d3f14fb1292a43e60c2895fd08a6555fff1 Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Wed, 9 Mar 2022 14:21:41 +0200
Subject: [PATCH 16/22] defensive calls to dplyr and tidyr functions

(cherry picked from commit 018a3ff8a23ddacc74fc3cf3ea4764e7c3ad7e3a)
---
 reporting/preproc_topics.R | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/reporting/preproc_topics.R b/reporting/preproc_topics.R
index 2bc3fd0..592e431 100755
--- a/reporting/preproc_topics.R
+++ b/reporting/preproc_topics.R
@@ -12,17 +12,17 @@ get_topics <- function(contentnodes, channelmetadata) {
   # Filter content nodes of type topic
   # Where id not equal to channel_id (if id = channel_id and kind = topic, it is actually a channel)
   topics <- contentnodes %>%
-    filter(
+    dplyr::filter(
       kind == "topic",
       id != channel_id
     ) %>%
-    rename(
+    dplyr::rename(
       # Rename the content_id to topic_id
       topic_id = id,
       # Rename the title of the node to topic_title
       topic_title = title
     ) %>%
-    select(
+    dplyr::select(
       # Select the topic_id, topic_title, and channel_id
       topic_id,
       topic_title,
@@ -30,24 +30,24 @@ get_topics <- function(contentnodes, channelmetadata) {
     ) %>%
     # Create a column called channel_topic
     # consists of channel_id and topic_id separated by underscore
-    mutate(channel_topic = paste0(channel_id, "_", topic_id))
+    dplyr::mutate(channel_topic = paste0(channel_id, "_", topic_id))
 
   # Join contentnodes and channelmetadata to topics
   topics_full <- contentnodes %>%
-    left_join(
+    dplyr::left_join(
       topics,
       c("parent_id" = "topic_id", "channel_id")
     ) %>%
-    left_join(
+    dplyr::left_join(
       # Join to channel_metadata
       channel_metadata,
       c("channel_id" = "id")
     ) %>%
-    rename(
+    dplyr::rename(
       # Rename the name column from channel_meta to channel_name
       channel_name = name
     ) %>%
-    select(
+    dplyr::select(
       content_id,
       channel_id,
       content_title = title,
@@ -69,12 +69,12 @@ get_topics <- function(contentnodes, channelmetadata) {
 #'
 get_topic_nodes_count <- function(topics) {
   topic_nodes_count <- topics %>%
-    filter(
+    dplyr::filter(
       kind != "topic",
       content_id != channel_id
     ) %>%
-    count(channel_id, topic_id, kind, name = "nodes_count") %>%
-    unite(
+    dplyr::count(channel_id, topic_id, kind, name = "nodes_count") %>%
+    tidyr::unite(
       "channel_topic_kind",
       c("channel_id", "topic_id", "kind"),
       sep = "_",
@@ -85,4 +85,4 @@ get_topic_nodes_count <- function(topics) {
 # Call the functions above and save the results to variables
 # channel contents and channel_metadata expected to be in global environment
 topics <- get_topics(channel_contents, channel_metadata)
-topic_nodes_count <- get_topic_nodes_count(topics)
\ No newline at end of file
+topic_nodes_count <- get_topic_nodes_count(topics)

From df6dbf7f706515a3f949b917c781842570bd476d Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Wed, 16 Mar 2022 13:43:41 +0200
Subject: [PATCH 17/22] added new alias to regenerate lessons and quizzes

(cherry picked from commit dba7428cd31ef64e37228799669e5bc9e2c75350)
---
 config/.bash_aliases | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/config/.bash_aliases b/config/.bash_aliases
index 6cf8fff..634cac1 100755
--- a/config/.bash_aliases
+++ b/config/.bash_aliases
@@ -25,6 +25,9 @@ alias flyway_bl='~/.scripts/config/flyway_bl.sh'
 # assign live learners to the right groups
 alias assign_learners='~/.kolibri_helper_scripts/assign_learners.sh;~/.baseline_testing/scripts/startup_script'
 
+# Regenerate lessons and quizzes, then assign learners and restart both kolibri and baseline
+alias make_quiz='kolibri stop;python ~/.kolibri_helper_scripts/setup.py;~/.kolibri_helper_scripts/assign_learners.sh;~/.scripts/restart_kolibri.sh;~/.baseline_testing/scripts/startup_script'
+
 #Import all Kolibri channels from the internet
 alias update_channels='~/.kolibri_helper_scripts/channel_setup/import_channels_network.sh'
 

From f1ecaa58ebf2100171867613a0306e2f7ef3d9cd Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Fri, 15 Jul 2022 17:17:42 +0200
Subject: [PATCH 18/22] changed timezone to match  country

---
 reporting/get_db_tables.R     |   5 +
 reporting/monthend.R          | 342 +++++++--------
 reporting/preproc_tables.R    | 205 ++++-----
 reporting/process_dateinput.R |  70 ++--
 reporting/transforms.R        | 760 ++++++++++++++++++----------------
 5 files changed, 722 insertions(+), 660 deletions(-)

diff --git a/reporting/get_db_tables.R b/reporting/get_db_tables.R
index b100642..e1c7452 100755
--- a/reporting/get_db_tables.R
+++ b/reporting/get_db_tables.R
@@ -63,6 +63,11 @@ content_sessionlogs <<- conn %>%
   dplyr::tbl("logger_contentsessionlog") %>%
   dplyr::collect()
 
+# user sessionlogs
+user_sessionlogs <<- conn %>%
+  dplyr::tbl("logger_usersessionlog") %>%
+  dplyr::collect()
+
 # get channel content
 channel_contents <<- conn %>%
   dplyr::tbl("content_contentnode") %>%
diff --git a/reporting/monthend.R b/reporting/monthend.R
index 60797f0..fb297b9 100755
--- a/reporting/monthend.R
+++ b/reporting/monthend.R
@@ -1,167 +1,175 @@
-# Clear work space
-rm(list = ls())
-
-# Load libraries
-suppressMessages(library(timeDate))
-suppressMessages(library(tidyr))
-suppressMessages(library(plyr))
-suppressMessages(library(dplyr))
-suppressMessages(library(tools))
-suppressMessages(library(gsubfn))
-suppressMessages(library(stringr))
-suppressMessages(library(rebus))
-suppressMessages(library(dbhelpers))
-
-# Source helper functions and prerequiste data into global scope
-source("helpers.R")
-source("get_db_tables.R")
-source("preproc_tables.R")
-source("preproc_topics.R")
-source("check_completed_ex_vid_count.R")
-source("check_sessionlogs.R")
-source("transforms.R")
-source("process_dateinput.R")
-
-# Prevent displaying warning messages from script on console(errors will still show)
-options(warn = -1)
-
-#' Function to get data extract only for month that user inputs
-#'
-#' @param dates A named vector derived from the \code{process_dateinput} function, containing the start and end dates for data extraction
-#' @param sessionlogs A \code{data.frame} containing ContentSessionLogs from Kolibri
-#' @param summarylogs A \code{data.frame} containing ContentSummaryLogs from Kolibri
-#' @param topics A \code{data.frame} containing ContentNodes of kind topic from Kolibri
-#' @param device_name A vector containing the device name, derived from Collections in Kolibri
-#' @param include_coach_content A \code{boolean} representing whether or not to include activity on ContentNodes flagged as coach content in the data extraction. Default value FALSE
-#'
-#' @return A \code{data.frame} containing activity data from between the start and end dates
-#'
-monthend <- function(dates, sessionlogs, summarylogs, topics, device_name, include_coach_content = FALSE) {
-  # Get the dates needed from the dates vector supplied
-  year_month <- dates$year_month
-  month_start <- dates$month_start
-  month_end <- dates$month_end
-
-  # Add start_date_only to session logs
-  if (isTRUE(include_coach_content)) {
-    sessionlogs <- sessionlogs %>%
-      dplyr::mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d"))
-  } else {
-    sessionlogs <- sessionlogs %>%
-      dplyr::filter(!content_id %in% coach_content$id) %>%
-      dplyr::mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d"))
-
-    summarylogs <- summarylogs %>%
-      dplyr::filter(!content_id %in% coach_content$id)
-  }
-
-
-  # Get total time spent by each user between month start and month end
-  time_spent_by_user <- get_time_spent_by_user(sessionlogs, month_start, month_end)
-
-  # Get the number of distinct days a user logeed in using the start_timestamp date only
-  logins_by_user <- get_logins_by_user(sessionlogs, month_start, month_end)
-
-  # Get the total number of completed exercises and videos between month start and month end
-  completed_ex_vid_count <- get_completed_ex_vid_count(sessionlogs, month_start, month_end)
-
-  # get total time spent by channel
-  time_by_channel <- get_time_by_channel(sessionlogs, month_start, month_end)
-
-  # Get exercises and videos completed for each channel
-  ex_vid_by_channel <- get_ex_vid_by_channel(sessionlogs, month_start, month_end)
-
-  # get total_progress by channel_id for all time
-  prog_by_user_by_channel <- get_prog_by_user_by_channel(sessionlogs)
-
-  # Exercises and videos completed by topic for the dates supplied
-  month_summary_exvid_by_topic <- get_month_summary_exvid_by_topic(
-    sessionlogs,
-    topics,
-    month_start,
-    month_end
-  )
-
-  # Time spent by topic for the dates supplied
-  month_summary_time_by_topic <- get_month_summary_time_by_topic(
-    sessionlogs,
-    topics,
-    month_start,
-    month_end
-  )
-
-  # Join all of the transformations together by user_id to make a complete report
-  rpt <- users %>%
-    dplyr::left_join(time_spent_by_user, by = c("id" = "user_id")) %>%
-    dplyr::left_join(completed_ex_vid_count, by = c("id" = "user_id")) %>%
-    dplyr::left_join(logins_by_user, by = c("id" = "user_id")) %>%
-    dplyr::left_join(time_by_channel, by = c("id" = "user_id")) %>%
-    dplyr::left_join(prog_by_user_by_channel, by = c("id" = "user_id")) %>%
-    dplyr::left_join(ex_vid_by_channel, by = c("id" = "user_id")) %>%
-    dplyr::left_join(month_summary_exvid_by_topic, by = c("id" = "user_id")) %>%
-    dplyr::left_join(month_summary_time_by_topic, by = c("id" = "user_id")) %>%
-    # Add new columns
-    dplyr::mutate(
-      month_active = ifelse(total_hours > 0, 1, 0),
-      module = rep("numeracy"),
-      # Set total exercises and total videos to 0 if total hours is 0
-      total_exercises = replace(total_exercises, total_hours == 0, 0),
-      total_videos = replace(total_videos, total_hours == 0, 0),
-      # Derive the first name and last name columns using helper functions
-      first_name = dbhelpers::get_first_name(full_name),
-      last_name = dbhelpers::get_last_name(full_name),
-      # Format the month end column into a string in the form YYYY-MM-DD
-      month_end = rep(strftime(month_end, "%Y-%m-%d"))
-    )
-
-  # Convert id column from uuid to character string
-  rpt <- rpt %>%
-    dplyr::mutate(id = str_replace_all(id, "-", "")) %>%
-    # Reorder columns. put familiar columns first
-    dplyr::select(
-      id,
-      first_name,
-      last_name,
-      username,
-      class_name,
-      centre,
-      total_hours,
-      total_exercises,
-      total_videos,
-      month_end,
-      last_login,
-      month_active,
-      module,
-      total_logins,
-      everything()
-    )
-
-  # Write report to csv
-  write.csv(
-    rpt,
-    file = generate_filename("monthend_", year_month, device_name),
-    col.names = FALSE,
-    row.names = FALSE,
-    na = "0"
-  )
-  system("echo Report extracted successfully!")
-  quit(save = "no")
-}
-
-# Get user input from the command-line
-input <- commandArgs(TRUE)
-
-# Process the user input and get a vector of dates
-dates_vec <- process_dateinput(input)
-
-# Check if content logs exist between the month start and month end
-check_sessionlogs(content_sessionlogs, dates_vec, device_name)
-
-# Extract the month end report
-monthend(
-  dates = dates_vec,
-  sessionlogs = content_sessionlogs,
-  summarylogs = content_summarylogs,
-  topics = topics,
-  device_name = device_name
-)
\ No newline at end of file
+# Clear work space
+rm(list = ls())
+
+# Load libraries
+suppressMessages(library(timeDate))
+suppressMessages(library(tidyr))
+suppressMessages(library(plyr))
+suppressMessages(library(dplyr))
+suppressMessages(library(tools))
+suppressMessages(library(gsubfn))
+suppressMessages(library(stringr))
+suppressMessages(library(rebus))
+suppressMessages(library(dbhelpers))
+
+# Source helper functions and prerequiste data into global scope
+source("helpers.R")
+source("get_db_tables.R")
+source("preproc_tables.R")
+source("preproc_topics.R")
+source("check_completed_ex_vid_count.R")
+source("check_sessionlogs.R")
+source("transforms.R")
+source("process_dateinput.R")
+
+# Prevent displaying warning messages from script on console(errors will still show)
+options(warn = -1)
+
+#' Function to get data extract only for month that user inputs
+#'
+#' @param dates A named vector derived from the \code{process_dateinput} function, containing the start and end dates for data extraction
+#' @param sessionlogs A \code{data.frame} containing ContentSessionLogs from Kolibri
+#' @param usersessionlogs A \code{data.frame} containing UserSessionLogs from Kolibri
+#' @param summarylogs A \code{data.frame} containing ContentSummaryLogs from Kolibri
+#' @param topics A \code{data.frame} containing ContentNodes of kind topic from Kolibri
+#' @param device_name A vector containing the device name, derived from Collections in Kolibri
+#' @param include_coach_content A \code{boolean} representing whether or not to include activity on ContentNodes flagged as coach content in the data extraction. Default value FALSE
+#'
+#' @return A \code{data.frame} containing activity data from between the start and end dates
+#'
+monthend <- function(dates, usersessionlogs, sessionlogs, summarylogs, topics, device_name, include_coach_content = FALSE) {
+  # Get the dates needed from the dates vector supplied
+  year_month <- dates$year_month
+  month_start <- dates$month_start
+  month_end <- dates$month_end
+
+  # Add start_date_only to session logs
+  if (isTRUE(include_coach_content)) {
+    sessionlogs <- sessionlogs %>%
+      dplyr::mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d"))
+  } else {
+    sessionlogs <- sessionlogs %>%
+      dplyr::filter(!content_id %in% coach_content$id) %>%
+      dplyr::mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d"))
+
+    summarylogs <- summarylogs %>%
+      dplyr::filter(!content_id %in% coach_content$id)
+  }
+
+
+  # Get total time spent by each user between month start and month end
+  time_spent_by_user <- get_time_spent_by_user(sessionlogs, month_start, month_end)
+
+  # Get the number of distinct days a user logeed in using the start_timestamp date only
+  logins_by_user <- get_logins_by_user(usersessionlogs, month_start, month_end)
+
+  # Get the total number of completed exercises and videos between month start and month end
+  completed_ex_vid_count <- get_completed_ex_vid_count(summarylogs, month_start, month_end)
+
+  # get total time spent by channel
+  time_by_channel <- get_time_by_channel(sessionlogs, month_start, month_end)
+
+  # Get exercises and videos completed for each channel
+  ex_vid_by_channel <- get_ex_vid_by_channel(summarylogs, month_start, month_end)
+
+  # Get the summary progress by topic and content type
+  ex_vid_progress_by_topic <- get_summary_act_by_topic(summarylogs, topics, topic_nodes_count)
+
+  # get total_progress by channel_id for all time
+  prog_by_user_by_channel <- get_prog_by_user_by_channel(sessionlogs)
+
+  # Exercises and videos completed by topic for the dates supplied
+  month_summary_exvid_by_topic <- get_month_summary_exvid_by_topic(
+    summarylogs,
+    topics,
+    month_start,
+    month_end
+  )
+
+  # Time spent by topic for the dates supplied
+  month_summary_time_by_topic <- get_month_summary_time_by_topic(
+    sessionlogs,
+    topics,
+    month_start,
+    month_end
+  )
+
+  # Join all of the transformations together by user_id to make a complete report
+  rpt <- users %>%
+    dplyr::left_join(time_spent_by_user, by = c("id" = "user_id")) %>%
+    dplyr::left_join(completed_ex_vid_count, by = c("id" = "user_id")) %>%
+    dplyr::left_join(logins_by_user, by = c("id" = "user_id")) %>%
+    dplyr::left_join(time_by_channel, by = c("id" = "user_id")) %>%
+    dplyr::left_join(prog_by_user_by_channel, by = c("id" = "user_id")) %>%
+    dplyr::left_join(ex_vid_by_channel, by = c("id" = "user_id")) %>%
+    dplyr::left_join(month_summary_exvid_by_topic, by = c("id" = "user_id")) %>%
+    dplyr::left_join(month_summary_time_by_topic, by = c("id" = "user_id")) %>%
+    dplyr::left_join(ex_vid_progress_by_topic, by = c("id" = "user_id")) %>%
+    # Add new columns
+    dplyr::mutate(
+      month_active = ifelse(total_hours > 0, 1, 0),
+      module = rep("numeracy"),
+      # Set total exercises and total videos to 0 if total hours is 0
+      total_exercises = replace(total_exercises, total_hours == 0, 0),
+      total_videos = replace(total_videos, total_hours == 0, 0),
+      # Derive the first name and last name columns using helper functions
+      first_name = dbhelpers::get_first_name(full_name),
+      last_name = dbhelpers::get_last_name(full_name),
+      # Format the month end column into a string in the form YYYY-MM-DD
+      month_end = rep(strftime(month_end, "%Y-%m-%d"))
+    )
+
+  # Convert id column from uuid to character string
+  rpt <- rpt %>%
+    dplyr::mutate(id = str_replace_all(id, "-", "")) %>%
+    # Reorder columns. put familiar columns first
+    dplyr::select(
+      id,
+      first_name,
+      last_name,
+      username,
+      class_name,
+      centre,
+      total_hours,
+      total_exercises,
+      total_videos,
+      month_end,
+      last_login,
+      month_active,
+      module,
+      total_logins,
+      everything()
+    ) %>%
+    # Replace NAs in numeric columns with 0
+    mutate_if(is.numeric, replace_na, replace = 0)
+
+  # Write report to csv
+  write.csv(
+    rpt,
+    file = generate_filename("monthend_", year_month, device_name),
+    col.names = FALSE,
+    row.names = FALSE,
+    na = "0"
+  )
+  system("echo Report extracted successfully!")
+  quit(save = "no")
+}
+
+# Get user input from the command-line
+input <- commandArgs(TRUE)
+
+# Process the user input and get a vector of dates
+dates_vec <- process_dateinput(input)
+
+# Check if content logs exist between the month start and month end
+check_sessionlogs(content_sessionlogs, dates_vec, device_name)
+
+# Extract the month end report
+monthend(
+  dates = dates_vec,
+  usersessionlogs = user_sessionlogs,
+  sessionlogs = content_sessionlogs,
+  summarylogs = content_summarylogs,
+  topics = topics,
+  device_name = device_name
+)
diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R
index f02925f..6004c95 100755
--- a/reporting/preproc_tables.R
+++ b/reporting/preproc_tables.R
@@ -1,101 +1,104 @@
-suppressMessages(library(plyr))
-suppressMessages(library(dplyr))
-suppressMessages(library(lubridate))
-
-# get the default facility id and from it get the device name(facility name)
-default_facility_id <<- default_facility_id %>%
-  dplyr::pull(default_facility_id)
-
-# get a df of all of the facilities on the device
-facilities <<- collections %>% dplyr::filter(kind == "facility")
-
-# keep the default facility as the device name (will be used to name the output file)
-device_name <<- collections %>%
-  dplyr::filter(id == default_facility_id) %>%
-  # Get only the first 5 characters of the name
-  dplyr::mutate(name = str_sub(name, 1, 5)) %>%
-  # Select only the name column
-  dplyr::select(name)
-
-# join collections to memberships. (used for getting user groups)
-memberships <<- memberships %>%
-  dplyr::left_join(collections,
-    by = c("collection_id" = "id")
-  )
-
-# get dataframe containing learners and groups they belong to
-learners_and_groups <<- memberships %>%
-  dplyr::filter(kind == "learnergroup") %>%
-  dplyr::distinct(user_id, .keep_all = TRUE) %>%
-  dplyr::select(name, user_id)
-
-# Get learners and the classrooms (grades) they belong to
-learners_and_grades <- memberships %>%
-  # filter out memberships of type learnergroup
-  dplyr::filter(kind == "classroom") %>%
-  dplyr::group_by(user_id) %>%
-  # If a learner belongs to multiple classes, separate them with commas
-  dplyr::mutate(name = paste(name, collapse = ",") %>% stringr::str_trim()) %>%
-  dplyr::ungroup() %>%
-  dplyr::distinct(user_id, .keep_all = T) %>%
-  dplyr::select("class_name" = name, user_id)
-
-
-# filter out admins and coaches to get list of users
-# select only the relevant columns
-users <<- facilityusers %>%
-  dplyr::filter(!id %in% roles$user_id)
-
-if (nrow(users) == 0) {
-  # If there are no users on the device, stop the program and inform the user
-  stop("No users found. Nothing to extract")
-} else {
-  # select the relevant columns from the users df
-  users <<- users %>%
-    # join users to facilities,
-    # rename the facility name to centre
-    # then drop the facility_id column
-    dplyr::left_join(facilities, by = c("facility_id" = "id")) %>%
-    dplyr::rename(centre = name) %>%
-    dplyr::left_join(learners_and_grades, by = c("id"= "user_id")) %>% 
-    # Convert the last login to the nearest timezone for the centre location
-    dplyr::mutate(
-      last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz("Africa/Windhoek")
-    ) %>%
-    dplyr::select(
-      id,
-      full_name,
-      username,
-      date_joined,
-      last_login,
-      class_name,
-      centre,
-      facility_id
-    ) %>%
-    dplyr::select(-facility_id)
-}
-
-# join channel metadata to channel_module
-channel_metadata <<- channel_metadata %>%
-  dplyr::left_join(channel_module, by = c("id" = "channel_id")) %>%
-  # create new column with module and abbreviated playlist name
-  dplyr::mutate(abbr_name = paste0(module, "_", abbreviate(name))) %>%
-  # create new column with abbr name and the word progress
-  # will be used as the column name for channel progress in final report
-  dplyr::mutate(abbr_name_progress = paste0(abbr_name, "_progress"))
-
-# create named vector with channel_ids and abbreviated playlist names
-course_name_id <- unlist(channel_metadata$abbr_name)
-names(course_name_id) <- unlist(channel_metadata$id)
-
-# create named vector with abbr_name_progress and make the channel ids the names of each of the elements
-course_name_id_progress <-
-  unlist(channel_metadata$abbr_name_progress)
-names(course_name_id_progress) <- unlist(channel_metadata$id)
-
-# get number of content items by channel.used to compute overall progress in channel
-num_contents_by_channel <<- channel_contents %>%
-  dplyr::filter(!kind %in% c("topic", "channel")) %>%
-  dplyr::count(channel_id, name = "total_items")
-
-coach_content <<- channel_contents %>% dplyr::filter(coach_content == TRUE)
+suppressMessages(library(plyr))
+suppressMessages(library(dplyr))
+suppressMessages(library(lubridate))
+
+centre_timezone <- "Africa/Windhoek"
+
+# get the default facility id and from it get the device name(facility name)
+default_facility_id <<- default_facility_id %>%
+  dplyr::pull(default_facility_id)
+
+# get a df of all of the facilities on the device
+facilities <<- collections %>% dplyr::filter(kind == "facility")
+
+# keep the default facility as the device name (will be used to name the output file)
+device_name <<- collections %>%
+  dplyr::filter(id == default_facility_id) %>%
+  # Get only the first 5 characters of the name
+  dplyr::mutate(name = str_sub(name, 1, 5)) %>%
+  # Select only the name column
+  dplyr::select(name)
+
+# join collections to memberships. (used for getting user groups)
+memberships <<- memberships %>%
+  dplyr::left_join(collections,
+    by = c("collection_id" = "id")
+  )
+
+# get dataframe containing learners and groups they belong to
+learners_and_groups <<- memberships %>%
+  dplyr::filter(kind == "learnergroup") %>%
+  dplyr::distinct(user_id, .keep_all = TRUE) %>%
+  dplyr::select(name, user_id)
+
+# Get learners and the classrooms (grades) they belong to
+learners_and_grades <- memberships %>%
+  # filter out memberships of type learnergroup
+  dplyr::filter(kind == "classroom") %>%
+  dplyr::group_by(user_id) %>%
+  # If a learner belongs to multiple classes, separate them with commas
+  dplyr::mutate(name = paste(name, collapse = ",") %>% stringr::str_trim()) %>%
+  dplyr::ungroup() %>%
+  dplyr::distinct(user_id, .keep_all = T) %>%
+  dplyr::select("class_name" = name, user_id)
+
+
+# filter out admins and coaches to get list of users
+# select only the relevant columns
+users <<- facilityusers %>%
+  dplyr::filter(!id %in% roles$user_id)
+
+if (nrow(users) == 0) {
+  # If there are no users on the device, stop the program and inform the user
+  stop("No users found. Nothing to extract")
+} else {
+  # select the relevant columns from the users df
+  users <<- users %>%
+    # join users to facilities,
+    # rename the facility name to centre
+    # then drop the facility_id column
+    dplyr::left_join(facilities, by = c("facility_id" = "id")) %>%
+    dplyr::rename(centre = name) %>%
+    dplyr::left_join(learners_and_grades, by = c("id" = "user_id")) %>%
+    # Convert the last login to the nearest timezone for the centre location
+    dplyr::mutate(
+      last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz(centre_timezone)
+    ) %>%
+    dplyr::select(
+      id,
+      full_name,
+      username,
+      date_joined,
+      last_login,
+      class_name,
+      centre,
+      facility_id
+    ) %>%
+    dplyr::select(-facility_id)
+}
+
+# join channel metadata to channel_module
+channel_metadata <<- channel_metadata %>%
+  dplyr::left_join(channel_module, by = c("id" = "channel_id")) %>%
+  # create new column with module and abbreviated playlist name
+  dplyr::mutate(abbr_name = paste0(module, "_", abbreviate(name))) %>%
+  # create new column with abbr name and the word progress
+  # will be used as the column name for channel progress in final report
+  dplyr::mutate(abbr_name_progress = paste0(abbr_name, "_progress"))
+
+# create named vector with channel_ids and abbreviated playlist names
+course_name_id <- unlist(channel_metadata$abbr_name)
+names(course_name_id) <- unlist(channel_metadata$id)
+
+# create named vector with abbr_name_progress and make the channel ids the names of each of the elements
+course_name_id_progress <-
+  unlist(channel_metadata$abbr_name_progress)
+
+names(course_name_id_progress) <- unlist(channel_metadata$id)
+
+# get number of content items by channel.used to compute overall progress in channel
+num_contents_by_channel <<- channel_contents %>%
+  dplyr::filter(!kind %in% c("topic", "channel")) %>%
+  dplyr::count(channel_id, name = "total_items")
+
+coach_content <<- channel_contents %>% dplyr::filter(coach_content == TRUE)
diff --git a/reporting/process_dateinput.R b/reporting/process_dateinput.R
index 14eab71..ea92d58 100755
--- a/reporting/process_dateinput.R
+++ b/reporting/process_dateinput.R
@@ -1,36 +1,36 @@
-#' Turn the user date input into a named vector
-#' containing year_month, month_start and month_end
-#'
-#' @param dateinput
-#'
-#' @return
-#' @export
-#'
-#' @examples
-process_dateinput <- function(dateinput) {
-  year_month <- dateinput
-
-  # With user input from the command line, create complete date by prefixing with 01
-  upper_limit <- paste("01-", year_month, sep = "")
-
-  # Check if the user input is a valid month and year, and in the form mm-yy
-  check_date_valid(upper_limit)
-
-  # Get month start and month end as correctly formatted strings
-  month_end <- as.Date(
-    timeLastDayInMonth(strftime(upper_limit, "%d-%m-%y"), format = "%y-%m-%d")
-  )
-
-  month_start <- as.Date(
-    timeFirstDayInMonth(strftime(upper_limit, "%d-%m-%y"), format = "%y-%m-%d")
-  )
-
-  # return a named vector containing year_month, month_start and month_end
-  dates_vec <- list(
-    year_month = year_month,
-    month_start = month_start,
-    month_end = month_end
-  )
-
-  return(dates_vec)
+#' Turn the user date input into a named vector
+#' containing year_month, month_start and month_end
+#'
+#' @param dateinput
+#'
+#' @return
+#' @export
+#'
+#' @examples
+process_dateinput <- function(dateinput) {
+  year_month <- dateinput
+
+  # With user input from the command line, create complete date by prefixing with 01
+  upper_limit <- paste("01-", year_month, sep = "")
+
+  # Check if the user input is a valid month and year, and in the form mm-yy
+  check_date_valid(upper_limit)
+
+  # Get month start and month end as correctly formatted strings
+  month_end <- as.Date(
+    timeLastDayInMonth(strftime(upper_limit, "%d-%m-%y"), format = "%y-%m-%d")
+  )
+
+  month_start <- as.Date(
+    timeFirstDayInMonth(strftime(upper_limit, "%d-%m-%y"), format = "%y-%m-%d")
+  )
+
+  # return a named vector containing year_month, month_start and month_end
+  dates_vec <- list(
+    year_month = year_month,
+    month_start = as.Date(month_start),
+    month_end = as.Date(month_end)
+  )
+
+  return(dates_vec)
 }
\ No newline at end of file
diff --git a/reporting/transforms.R b/reporting/transforms.R
index bac56d5..062a5ee 100755
--- a/reporting/transforms.R
+++ b/reporting/transforms.R
@@ -1,357 +1,403 @@
-#' Get total time spent by each user between month start and month end
-#'
-#' @param sessionlogs A dataframe of contentsessionlogs
-#' @param lower_lim
-#' @param upper_lim
-#'
-#' @return
-#' @export
-#'
-#' @examples
-get_time_spent_by_user <- function(sessionlogs, lower_lim, upper_lim) {
-  time_spent_by_user <- sessionlogs %>%
-    dplyr::filter(
-      start_timestamp >= lower_lim,
-      end_timestamp <= upper_lim
-    ) %>%
-    dplyr::group_by(user_id) %>%
-    dplyr::summarize(total_hours = sum(time_spent) / 3600)
-
-  print(paste(
-    "Sucessfully retrieved time spent by user between",
-    lower_lim,
-    "and",
-    upper_lim
-  ))
-
-  return(time_spent_by_user)
-}
-
-#' Get the number of distinct days a user logged in using the start_timestamp date only
-#'
-#' @param sessionlogs
-#' @param lower_lim
-#' @param upper_lim
-#'
-#' @return
-#' @export
-#'
-#' @examples
-get_logins_by_user <- function(sessionlogs, lower_lim, upper_lim) {
-  logins_by_user <- sessionlogs %>%
-    dplyr::filter(
-      start_timestamp >= lower_lim,
-      end_timestamp <= upper_lim
-    ) %>%
-    dplyr::distinct(user_id, start_date_only) %>%
-    dplyr::count(user_id, name = "total_logins")
-
-  print(paste(
-    "Sucessfully retrieved logins by user between",
-    lower_lim,
-    "and",
-    upper_lim
-  ))
-
-  return(logins_by_user)
-}
-
-
-#' Get the total number of completed exercises and videos between month start and month end
-#'
-#' @param sessionlogs
-#' @param lower_lim
-#' @param upper_lim
-#'
-#' @return
-#' @export
-#'
-#' @examples
-get_completed_ex_vid_count <- function(sessionlogs, lower_lim, upper_lim) {
-  completed_ex_vid_count <- sessionlogs %>%
-    dplyr::filter(
-      start_timestamp >= lower_lim,
-      end_timestamp <= upper_lim,
-      progress >= 0.99
-    ) %>%
-    dplyr::count(user_id, kind, name = "count") %>%
-    check_completed_ex_vid_count()
-
-  print(paste(
-    "Sucessfully retrieved exercises and videos completed by user between",
-    lower_lim,
-    "and",
-    upper_lim
-  ))
-
-  return(completed_ex_vid_count)
-}
-
-
-#' Get total time spent by channel
-#'
-#' @param sessionlogs
-#' @param lower_lim
-#' @param upper_lim
-#'
-#' @return
-#' @export
-#'
-#' @examples
-get_time_by_channel <- function(sessionlogs, lower_lim, upper_lim) {
-  time_by_channel <- sessionlogs %>%
-    dplyr::filter(
-      start_timestamp >= lower_lim,
-      end_timestamp <= upper_lim
-    ) %>%
-    dplyr::group_by(user_id, channel_id) %>%
-    dplyr::summarise(total_time = sum(time_spent) / 3600) %>%
-    tidyr::pivot_wider(names_from = channel_id, values_from = total_time) %>%
-    dplyr::rename_at(
-      vars(-user_id),
-      function(x) {
-        paste0(x, "_playlist_timespent")
-      }
-    ) %>%
-    dplyr::ungroup()
-
-  print(paste(
-    "Sucessfully retrieved total time by channel by user between",
-    lower_lim,
-    "and",
-    upper_lim
-  ))
-
-
-  return(time_by_channel)
-  # result of above is a data frame
-  # time spent on each channel as a separate column with the channel id as the column name
-
-  # change column names which are channel_ids from channel_ids to readable course names
-  # using the named vector created outside the function
-  # names(time_by_channel) <- c("user_id",recode(names(time_by_channel)[-1],!!!course_name_id))
-}
-
-
-#' Get exercises and videos completed for each channel
-#'
-#' @param sessionlogs
-#' @param lower_lim
-#' @param upper_lim
-#'
-#' @return
-#' @export
-#'
-#' @examples
-get_ex_vid_by_channel <- function(sessionlogs, lower_lim, upper_lim) {
-  ex_vid_by_channel <- sessionlogs %>%
-    dplyr::filter(
-      start_timestamp >= lower_lim,
-      end_timestamp <= upper_lim,
-      progress >= 0.99
-    ) %>%
-    dplyr::group_by(user_id, channel_id) %>%
-    dplyr::count(user_id, kind, name = "count") %>%
-    tidyr::unite("act_channel", c(channel_id, kind)) %>%
-    tidyr::pivot_wider(names_from = act_channel, values_from = count) %>%
-    dplyr::rename_at(
-      vars(-user_id),
-      function(x) {
-        str_replace(x, "_exercise", "_playlist_exercise") %>%
-          str_replace("_video", "_playlist_video") %>%
-          str_replace("_document", "_playlist_document")
-      }
-    ) %>%
-    dplyr::ungroup()
-
-  print(paste(
-    "Sucessfully retrieved exercises and videos by channel by user between",
-    lower_lim,
-    "and",
-    upper_lim
-  ))
-
-
-  return(ex_vid_by_channel)
-}
-
-
-#' Get total_progress by channel_id for all time
-#'
-#' @param sessionlogs
-#'
-#' @return
-#' @export
-#'
-#' @examples
-get_prog_by_user_by_channel <- function(sessionlogs) {
-  prog_by_user_by_channel <- sessionlogs %>%
-    dplyr::group_by(user_id, channel_id, content_id) %>%
-    dplyr::summarise(max_prog = max(progress)) %>%
-    dplyr::group_by(user_id, channel_id) %>%
-    dplyr::summarise(total_prog = sum(max_prog)) %>%
-    # join total prog by channel to number of items by channel
-    # used to get percent progress in channel
-    dplyr::left_join(num_contents_by_channel) %>%
-    # create a column for the percent progress by channel
-    dplyr::mutate(pct_progress = total_prog / total_items) %>%
-    # get rid of the columns for total prog and total_items
-    # turn the progress for each channel into a separate column
-    dplyr::select(-c(total_prog, total_items)) %>%
-    tidyr::pivot_wider(names_from = channel_id, values_from = pct_progress) %>%
-    dplyr::rename_at(
-      vars(-user_id),
-      function(x) {
-        paste0(x, "_playlist_progress")
-      }
-    ) %>%
-    dplyr::ungroup()
-
-  print("Sucessfully retrieved summary channel progress by user")
-
-  return(prog_by_user_by_channel)
-}
-
-
-#' Summary timespent and progress by topic and content kind for all time
-#'
-#' @param summarylogs
-#' @param topics
-#' @param topic_nodes_count
-#'
-#' @return
-#' @export
-#'
-#' @examples
-get_summary_act_by_topic <- function(summarylogs, topics, topic_nodes_count) {
-  summary_act_by_topic <- summarylogs %>%
-    dplyr::left_join(
-      topics,
-      by = c("content_id", "channel_id", "kind")
-    ) %>%
-    tidyr::unite(
-      "topic_act_type",
-      c("channel_id", "topic_id", "kind"),
-      sep = "_"
-    ) %>%
-    dplyr::group_by(user_id, topic_act_type) %>%
-    dplyr::summarise(
-      topic_act_timespent = sum(time_spent),
-      topic_act_totalprog = sum(progress)
-    ) %>%
-    dplyr::ungroup() %>%
-    dplyr::left_join(
-      topic_nodes_count,
-      by = c("topic_act_type" = "channel_topic_kind")
-    ) %>%
-    dplyr::mutate(
-      topic_act_progpct = topic_act_totalprog / nodes_count
-    ) %>%
-    # Only get user_id, topic_act_type and progpct
-    dplyr::select(
-      user_id,
-      topic_act_type,
-      topic_act_progpct
-    ) %>%
-    replace_na(list(topic_act_progpct = 0L))
-
-  print("Sucessfully retrieved summary activity by topic")
-
-  return(summary_act_by_topic)
-}
-
-
-
-#' Get summary of time spent by topic for each user
-#'
-#' @param sessionlogs
-#' @param topics
-#' @param lower_lim
-#' @param upper_lim
-#'
-#' @return
-#' @export
-#'
-#' @examples
-get_month_summary_time_by_topic <- function(sessionlogs, topics, lower_lim, upper_lim) {
-  month_summary_time_by_topic <- sessionlogs %>%
-    dplyr::filter(
-      start_timestamp >= lower_lim,
-      end_timestamp <= upper_lim
-    ) %>%
-    dplyr::left_join(
-      topics,
-      by = c("content_id", "channel_id", "kind")
-    ) %>%
-    tidyr::unite(
-      "topic_act_type",
-      c("channel_id", "topic_id", "kind"),
-      sep = "_"
-    ) %>%
-    dplyr::group_by(user_id, topic_act_type) %>%
-    dplyr::summarise(
-      topic_act_timespent = sum(time_spent) / 3600
-    ) %>%
-    dplyr::ungroup() %>%
-    dplyr::mutate(topic_act_type = str_c(
-      # Add the word time spent to topic_act_type
-      topic_act_type, "timespent"
-    )) %>%
-    tidyr::pivot_wider(names_from = topic_act_type, values_from = topic_act_timespent)
-
-  print(paste(
-    "Sucessfully retrieved summary_progress by user between",
-    lower_lim,
-    "and",
-    upper_lim
-  ))
-
-  return(month_summary_time_by_topic)
-}
-
-
-
-#' Get summary of exercises done and videos watched by each user
-#'
-#' @param sessionlogs
-#' @param topics
-#' @param lower_lim
-#' @param upper_lim
-#'
-#' @return
-#' @export
-#'
-#' @examples
-get_month_summary_exvid_by_topic <- function(sessionlogs, topics, lower_lim, upper_lim) {
-  month_summary_exvid_by_topic <- sessionlogs %>%
-    dplyr::filter(
-      start_timestamp >= lower_lim,
-      end_timestamp <= upper_lim,
-      progress >= 0.99
-    ) %>%
-    dplyr::left_join(
-      topics,
-      by = c("content_id", "channel_id", "kind")
-    ) %>%
-    tidyr::unite(
-      "topic_act_type",
-      c("channel_id", "topic_id", "kind"),
-      sep = "_"
-    ) %>%
-    dplyr::count(user_id, topic_act_type, name = "num_completed") %>%
-    dplyr::ungroup() %>%
-    dplyr::mutate(topic_act_type = str_c(
-      # Add the word completed to topic_act_type
-      topic_act_type, "completed"
-    )) %>%
-    tidyr::pivot_wider(names_from = topic_act_type, values_from = num_completed)
-
-  print(paste(
-    "Sucessfully retrieved summary exercises and videos by topic by user between",
-    lower_lim,
-    "and",
-    upper_lim
-  ))
-
-  return(month_summary_exvid_by_topic)
-}
\ No newline at end of file
+#' Get total time spent by each user between month start and month end
+#'
+#' @param sessionlogs A \code{data.frame} of ContentSessionlogs
+#' @param lower_lim Lower bound of date range
+#' @param upper_lim Upper bound of date range#'
+#'
+#' @return A \code{data.frame}
+#' @export
+#'
+#' @examples
+get_time_spent_by_user <- function(sessionlogs, lower_lim, upper_lim) {
+  time_spent_by_user <- sessionlogs %>%
+    dplyr::mutate(
+      start_timestamp = as.Date(start_timestamp),
+      end_timestamp = as.Date(end_timestamp)
+    ) %>%
+    dplyr::filter(
+      between(start_timestamp, lower_lim, upper_lim),
+      between(end_timestamp, lower_lim, upper_lim)
+    ) %>%
+    dplyr::group_by(user_id) %>%
+    dplyr::summarize(total_hours = sum(time_spent) / 3600)
+
+  print(paste(
+    "Sucessfully retrieved time spent by user between",
+    lower_lim,
+    "and",
+    upper_lim
+  ))
+
+  return(time_spent_by_user)
+}
+
+#' Get the number of distinct days a user logged in using the start_timestamp date only
+#'
+#' @param usersessionlogs A \code{data.frame} of UserSessionlogs
+#' @param lower_lim Lower bound of date range
+#' @param upper_lim Upper bound of date range
+#'
+#' @return
+#' @export
+#'
+#' @examples
+get_logins_by_user <- function(usersessionlogs, lower_lim, upper_lim) {
+  logins_by_user <- usersessionlogs %>%
+    dplyr::mutate(
+      start_timestamp = as.Date(start_timestamp),
+      last_interaction_timestamp = as.Date(last_interaction_timestamp)
+    ) %>%
+    dplyr::filter(
+      between(start_timestamp, lower_lim, upper_lim),
+      between(last_interaction_timestamp, lower_lim, upper_lim)
+    ) %>%
+    mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d")) %>%
+    dplyr::distinct(user_id, start_date_only) %>%
+    dplyr::count(user_id, name = "total_logins")
+
+  print(paste(
+    "Sucessfully retrieved logins by user between",
+    lower_lim,
+    "and",
+    upper_lim
+  ))
+
+  return(logins_by_user)
+}
+
+
+#' Get the total number of completed exercises and videos between month start and month end
+#'
+#' @param summarylogs A \code{data.frame} of ContentSummarylogs
+#' @param lower_lim Lower bound of date range
+#' @param upper_lim Upper bound of date range
+#'
+#' @return A \code{data.frame}
+#' @export
+#'
+#' @examples
+get_completed_ex_vid_count <- function(summarylogs, lower_lim, upper_lim) {
+  completed_ex_vid_count <- summarylogs %>%
+    dplyr::mutate(
+      completion_timestamp = as.Date(completion_timestamp)
+    ) %>%
+    dplyr::filter(
+      between(completion_timestamp, lower_lim, upper_lim)
+    ) %>%
+    dplyr::count(user_id, kind, name = "count") %>%
+    check_completed_ex_vid_count()
+
+  print(paste(
+    "Sucessfully retrieved exercises and videos completed by user between",
+    lower_lim,
+    "and",
+    upper_lim
+  ))
+
+  return(completed_ex_vid_count)
+}
+
+
+#' Get number of unique attempted exercise and videos between a date range
+#'
+#' @param sessionlogs A \code{data.frame} of the ContentSessionlogs
+#' @param lower_lim Lower bound of date range
+#' @param upper_lim Upper bound of date range
+#'
+#' @return A \code{data.frame}
+#' @export
+#'
+get_attempted_ex_vid_count <- function(sessionlogs, lower_lim, upper_lim) {
+  attempted_ex_vid_count <- sessionlogs %>%
+    dplyr::mutate(
+      start_timestamp = as.Date(start_timestamp),
+      end_timestamp = as.Date(end_timestamp)
+    ) %>%
+    dplyr::filter(
+      between(start_timestamp, lower_lim, upper_lim),
+      between(end_timestamp, lower_lim, upper_lim)
+    ) %>%
+    dplyr::distinct(content_id, .keep_all = T) %>%
+    dplyr::count(user_id, kind, name = "count") %>%
+    check_completed_ex_vid_count()
+
+  print(paste(
+    "Sucessfully retrieved exercises and videos completed by user between",
+    lower_lim,
+    "and",
+    upper_lim
+  ))
+
+  return(attempted_ex_vid_count)
+}
+
+#' Get total time spent by channel
+#'
+#' @param sessionlogs A \code{data.frame} of ContentSessionlogs
+#' @param lower_lim Lower bound of date range
+#' @param upper_lim Upper bound of date range
+#'
+#' @return A \code{data.frame}
+#' @export
+#'
+#' @examples
+get_time_by_channel <- function(sessionlogs, lower_lim, upper_lim) {
+  time_by_channel <- sessionlogs %>%
+    dplyr::mutate(
+      start_timestamp = as.Date(start_timestamp),
+      end_timestamp = as.Date(end_timestamp)
+    ) %>%
+    dplyr::filter(
+      between(start_timestamp, lower_lim, upper_lim),
+      between(end_timestamp, lower_lim, upper_lim)
+    ) %>%
+    dplyr::group_by(user_id, channel_id) %>%
+    dplyr::summarise(total_time = sum(time_spent) / 3600) %>%
+    tidyr::pivot_wider(
+      names_from = channel_id,
+      names_glue = "{channel_id}_playlist_timespent",
+      values_from = total_time,
+      values_fill = 0
+    ) %>%
+    dplyr::ungroup()
+
+  print(paste(
+    "Sucessfully retrieved total time by channel by user between",
+    lower_lim,
+    "and",
+    upper_lim
+  ))
+
+
+  return(time_by_channel)
+  # result of above is a data frame
+  # time spent on each channel as a separate column with the channel id as the column name
+
+  # change column names which are channel_ids from channel_ids to readable course names
+  # using the named vector created outside the function
+  # names(time_by_channel) <- c("user_id",recode(names(time_by_channel)[-1],!!!course_name_id))
+}
+
+
+#' Get exercises and videos completed for each channel
+#'
+#' @param summarylogs A \code{data.frame} of ContentSummarylogs
+#' @param lower_lim Lower bound of date range
+#' @param upper_lim Upper bound of date range
+#'
+#' @return A \code{data.frame}
+#' @export
+#'
+#' @examples
+get_ex_vid_by_channel <- function(summarylogs, lower_lim, upper_lim) {
+  ex_vid_by_channel <- summarylogs %>%
+    dplyr::mutate(
+      completion_timestamp = as.Date(completion_timestamp)
+    ) %>%
+    dplyr::filter(
+      between(completion_timestamp, lower_lim, upper_lim)
+    ) %>%
+    dplyr::count(user_id, channel_id, kind, name = "count") %>%
+    tidyr::pivot_wider(
+      names_from = c(channel_id, kind),
+      names_glue = "{channel_id}_playlist_{kind}",
+      values_from = count,
+      values_fill = 0
+    )
+
+  print(paste(
+    "Sucessfully retrieved exercises and videos by channel by user between",
+    lower_lim,
+    "and",
+    upper_lim
+  ))
+
+
+  return(ex_vid_by_channel)
+}
+
+
+#' Get total_progress by channel_id for all time
+#'
+#' @param sessionlogs A \code{data.frame} of ContentSessionlogs
+#'
+#' @return A \code{data.frame}
+#' @export
+#'
+#' @examples
+get_prog_by_user_by_channel <- function(sessionlogs) {
+  prog_by_user_by_channel <- sessionlogs %>%
+    dplyr::group_by(user_id, channel_id, content_id) %>%
+    dplyr::summarise(max_prog = max(progress)) %>%
+    dplyr::group_by(user_id, channel_id) %>%
+    dplyr::summarise(total_prog = sum(max_prog)) %>%
+    # join total prog by channel to number of items by channel
+    # used to get percent progress in channel
+    dplyr::left_join(num_contents_by_channel) %>%
+    # create a column for the percent progress by channel
+    dplyr::mutate(pct_progress = total_prog / total_items) %>%
+    # get rid of the columns for total prog and total_items
+    # turn the progress for each channel into a separate column
+    dplyr::select(-c(total_prog, total_items)) %>%
+    tidyr::pivot_wider(
+      names_from = channel_id,
+      names_glue = "{channel_id}_playlist_progress",
+      values_from = pct_progress,
+      values_fill = 0
+    ) %>%
+    dplyr::ungroup()
+
+  print("Sucessfully retrieved summary channel progress by user")
+
+  return(prog_by_user_by_channel)
+}
+
+
+#' Summary timespent and progress by topic and content kind for all time
+#'
+#' @param summarylogs A \code{data.frame} of ContentSummarylogs
+#' @param topics A \code{data.frame} of the topics, from the get_topics \code{function}
+#' @param topic_nodes_count A \code{data.frame} of the topic_nodes_count, from the get_topic_nodes_count \code{function}
+#'
+#' @return A \code{data.frame}
+#' @export
+#'
+#' @examples
+get_summary_act_by_topic <- function(summarylogs, topics, topic_nodes_count) {
+  summary_act_by_topic <- summarylogs %>%
+    dplyr::left_join(
+      topics,
+      by = c("content_id", "channel_id", "kind")
+    ) %>%
+    tidyr::unite(
+      "topic_act_type",
+      c("channel_id", "topic_id", "kind"),
+      sep = "_"
+    ) %>%
+    dplyr::group_by(user_id, topic_act_type) %>%
+    dplyr::summarise(
+      topic_act_timespent = sum(time_spent),
+      topic_act_totalprog = sum(progress)
+    ) %>%
+    dplyr::ungroup() %>%
+    dplyr::left_join(
+      topic_nodes_count,
+      by = c("topic_act_type" = "channel_topic_kind")
+    ) %>%
+    dplyr::mutate(
+      topic_act_progpct = topic_act_totalprog / nodes_count
+    ) %>%
+    # Only get user_id, topic_act_type and progpct
+    dplyr::select(
+      user_id,
+      topic_act_type,
+      topic_act_progpct
+    ) %>%
+    tidyr::pivot_wider(names_from = topic_act_type, values_from = topic_act_progpct, values_fill = 0)
+
+  print("Sucessfully retrieved summary activity by topic")
+
+  return(summary_act_by_topic)
+}
+
+
+
+#' Get summary of time spent by topic for each user
+#'
+#' @param sessionlogs A \code{data.frame} of ContentSessionlogs
+#' @param topics A \code{data.frame} of the topics, from the get_topics \code{function}
+#' @param lower_lim Lower bound of date range
+#' @param upper_lim Upper bound of date range
+#'
+#' @return A \code{data.frame}
+#' @export
+#'
+#' @examples
+get_month_summary_time_by_topic <- function(sessionlogs, topics, lower_lim, upper_lim) {
+  month_summary_time_by_topic <- sessionlogs %>%
+    dplyr::mutate(
+      start_timestamp = as.Date(start_timestamp),
+      end_timestamp = as.Date(end_timestamp)
+    ) %>%
+    dplyr::filter(
+      between(start_timestamp, lower_lim, upper_lim),
+      between(end_timestamp, lower_lim, upper_lim)
+    ) %>%
+    dplyr::left_join(
+      topics,
+      by = c("content_id", "channel_id", "kind")
+    ) %>%
+    tidyr::unite(
+      "topic_act_type",
+      c("channel_id", "topic_id", "kind"),
+      sep = "_"
+    ) %>%
+    dplyr::group_by(user_id, topic_act_type) %>%
+    dplyr::summarise(
+      topic_act_timespent = sum(time_spent) / 3600
+    ) %>%
+    dplyr::ungroup() %>%
+    tidyr::pivot_wider(
+      names_from = topic_act_type,
+      names_glue = "{topic_act_type}timespent",
+      values_from = topic_act_timespent,
+      values_fill = 0
+    )
+
+  print(paste(
+    "Sucessfully retrieved summary_progress by user between",
+    lower_lim,
+    "and",
+    upper_lim
+  ))
+
+  return(month_summary_time_by_topic)
+}
+
+
+
+#' Get summary of exercises done and videos watched by each user
+#'
+#' @param sessionlogs A \code{data.frame} of ContentSessionlogs
+#' @param topics A \code{data.frame} of the topics, from the get_topics \code{function}
+#' @param lower_lim Lower bound of date range
+#' @param upper_lim Upper bound of date range
+#'
+#' @return A \code{data.frame}
+#' @export
+#'
+#' @examples
+get_month_summary_exvid_by_topic <- function(summarylogs, topics, lower_lim, upper_lim) {
+  month_summary_exvid_by_topic <- summarylogs %>%
+    dplyr::mutate(
+      completion_timestamp = as.Date(completion_timestamp)
+    ) %>%
+    dplyr::filter(
+      between(completion_timestamp, lower_lim, upper_lim)
+    ) %>%
+    dplyr::left_join(
+      topics,
+      by = c("content_id", "channel_id", "kind")
+    ) %>%
+    dplyr::count(
+      user_id,
+      channel_id,
+      topic_id, kind,
+      name = "num_completed"
+    ) %>%
+    tidyr::pivot_wider(
+      names_from = c(channel_id, topic_id, kind),
+      names_glue = "{channel_id}_{topic_id}_{kind}completed",
+      values_from = num_completed,
+      values_fill = 0
+    )
+
+  print(paste(
+    "Sucessfully retrieved summary exercises and videos by topic by user between",
+    lower_lim,
+    "and",
+    upper_lim
+  ))
+
+  return(month_summary_exvid_by_topic)
+}

From a317ddb3423149cb8a100a0ce6e9fbe6183f9b3e Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Fri, 15 Jul 2022 14:11:28 +0200
Subject: [PATCH 19/22] added flyway_bl migrate to restartko alias

(cherry picked from commit 4cb20303d713df7456f32b41a7d0155c3fa68fb7)
---
 config/.bash_aliases | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/.bash_aliases b/config/.bash_aliases
index 634cac1..2fb8099 100755
--- a/config/.bash_aliases
+++ b/config/.bash_aliases
@@ -10,7 +10,7 @@ alias whoru='cd ~/.scripts/identify/;python identify.py'
 alias alldata='~/.scripts/reporting/alldata.sh'
 alias monthend='~/.scripts/reporting/monthend.sh'
 alias monthend_swap='KOLIBRI_DATABASE_NAME=$KOLIBRI_SWAP_DB;BASELINE_DATABASE_NAME=$BASELINE_SWAP_DB;~/.scripts/reporting/monthend.sh'
-alias restartko='~/.scripts/restart_kolibri.sh;~/.baseline_testing/scripts/startup_script'
+alias restartko='~/.scripts/restart_kolibri.sh;~/.scripts/config/flyway_bl.sh migrate;~/.baseline_testing/scripts/startup_script'
 alias backup='python -m kolibri stop;sudo service nginx stop;~/.scripts/backupdb/backup.sh'
 alias upgrade='~/upgrade'
 alias tunnel='~/.scripts/ssh_tunnel/create_ssh_tunnel.sh'

From 127d2a66a85b8d77d9d31643c026298737e165ea Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Wed, 7 Sep 2022 12:10:23 +0200
Subject: [PATCH 20/22] fixed merge conflict

---
 config/add_pgtune_settings.sh | 56 +++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/config/add_pgtune_settings.sh b/config/add_pgtune_settings.sh
index 66787c8..c0c2785 100755
--- a/config/add_pgtune_settings.sh
+++ b/config/add_pgtune_settings.sh
@@ -1,27 +1,39 @@
 #!/bin/bash
 
-# Get backup of postgresql conf file
-# Remove all lines after the Add settings for extensions here line
-sudo sed -i.backup '1,/Add settings for extensions here/!d' /etc/postgresql/11/main/postgresql.conf
+DIRECTORY="/etc/postgresql/13/main"
 
-# Add the settings for 4 cores, 4GB RAM, HDD storage
+# Store path to conf file in variable
+CONF_FILE="$DIRECTORY/postgresql.conf"
 
-echo "max_connections = 200" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "shared_buffers = 1GB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "effective_cache_size = 3GB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "maintenance_work_mem = 256MB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "checkpoint_completion_target = 0.9" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "wal_buffers = 16MB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "default_statistics_target = 100" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "random_page_cost = 4" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "effective_io_concurrency = 2" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "work_mem = 2621kB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "min_wal_size = 1GB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "max_wal_size = 4GB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "max_worker_processes = 4" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "max_parallel_workers_per_gather = 2" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "max_parallel_workers = 4" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
-echo "max_parallel_maintenance_workers = 2" | sudo tee -a /etc/postgresql/11/main/postgresql.conf
+if [ ! -d "$DIRECTORY" ]; then
+	echo "Postgres 13 has not been set up. Skipping...."
+else
+	echo "Postgres 13 has been set up. Adding tuning settings"
 
-# restart the postgresql service
-sudo systemctl restart postgresql
\ No newline at end of file
+	# Get backup of postgresql conf file
+	# Remove all lines after the Add settings for extensions here line
+	sudo sed -i.backup '1,/Add settings for extensions here/!d' "$CONF_FILE"
+
+	# Add the settings for 2 cores, 4GB RAM, HDD storage, data warehouse
+
+	echo "shared_preload_libraries = 'pg_stat_statements'" | sudo tee -a "$CONF_FILE"
+	echo "max_connections = 200" | sudo tee -a "$CONF_FILE"
+	echo "shared_buffers = 1GB" | sudo tee -a "$CONF_FILE"
+	echo "effective_cache_size = 3GB" | sudo tee -a "$CONF_FILE"
+	echo "maintenance_work_mem = 512MB" | sudo tee -a "$CONF_FILE"
+	echo "checkpoint_completion_target = 0.9" | sudo tee -a "$CONF_FILE"
+	echo "wal_buffers = 16MB" | sudo tee -a "$CONF_FILE"
+	echo "default_statistics_target = 500" | sudo tee -a "$CONF_FILE"
+	echo "random_page_cost = 4" | sudo tee -a "$CONF_FILE"
+	echo "effective_io_concurrency = 2" | sudo tee -a "$CONF_FILE"
+	echo "work_mem = 13107kB" | sudo tee -a "$CONF_FILE"
+	echo "min_wal_size = 4GB" | sudo tee -a "$CONF_FILE"
+	echo "max_wal_size = 16GB" | sudo tee -a "$CONF_FILE"
+	echo "max_worker_processes = 2" | sudo tee -a "$CONF_FILE"
+	echo "max_parallel_workers_per_gather = 1" | sudo tee -a "$CONF_FILE"
+	echo "max_parallel_workers = 2" | sudo tee -a "$CONF_FILE"
+	echo "max_parallel_maintenance_workers = 1" | sudo tee -a "$CONF_FILE"
+
+	# restart the postgresql service
+	sudo systemctl restart postgresql
+fi
\ No newline at end of file

From f026e227b48bc323cdc89d3c1ab98e41dcb08e35 Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Wed, 21 Sep 2022 09:51:57 +0200
Subject: [PATCH 21/22] removed dependency on channel module table. all courses
 are numeracy for now

(cherry picked from commit b133218e1c01d77d50948ba77582aa2d1ef5c896)
---
 reporting/get_db_tables.R  | 6 +++---
 reporting/preproc_tables.R | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/reporting/get_db_tables.R b/reporting/get_db_tables.R
index e1c7452..e56ac28 100755
--- a/reporting/get_db_tables.R
+++ b/reporting/get_db_tables.R
@@ -49,9 +49,9 @@ default_facility_id <<- conn %>%
   dplyr::collect()
 
 # get module for each channel
-channel_module <<- conn %>%
-  dplyr::tbl("channel_module") %>%
-  dplyr::collect()
+# channel_module <<- conn %>%
+#   dplyr::tbl("channel_module") %>%
+#   dplyr::collect()
 
 # content summary logs
 content_summarylogs <<- conn %>%
diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R
index 6004c95..aab3685 100755
--- a/reporting/preproc_tables.R
+++ b/reporting/preproc_tables.R
@@ -79,9 +79,11 @@ if (nrow(users) == 0) {
 
 # join channel metadata to channel_module
 channel_metadata <<- channel_metadata %>%
-  dplyr::left_join(channel_module, by = c("id" = "channel_id")) %>%
+  # dplyr::left_join(channel_module, by = c("id" = "channel_id")) %>%
   # create new column with module and abbreviated playlist name
-  dplyr::mutate(abbr_name = paste0(module, "_", abbreviate(name))) %>%
+  # dplyr::mutate(abbr_name = paste0(module, "_", abbreviate(name))) %>%
+# Paste the word numeracy and the abbreviated channel name (All courses are numeracy)
+  dplyr::mutate(abbr_name = paste0("numeracy", "_", abbreviate(name))) %>%
   # create new column with abbr name and the word progress
   # will be used as the column name for channel progress in final report
   dplyr::mutate(abbr_name_progress = paste0(abbr_name, "_progress"))

From 13a81e6c52fcf02723a385259b63e54d88247d72 Mon Sep 17 00:00:00 2001
From: Kapya Sakala <kapsakala@gmail.com>
Date: Wed, 28 Sep 2022 17:17:41 +0200
Subject: [PATCH 22/22] fixed bug with centre col when no sessionlogs exist for
 inputted month

(cherry picked from commit 8e228313c38cc5147eed75219942d0bf65360c41)
---
 reporting/check_sessionlogs.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/reporting/check_sessionlogs.R b/reporting/check_sessionlogs.R
index cfa631a..2b8e08d 100755
--- a/reporting/check_sessionlogs.R
+++ b/reporting/check_sessionlogs.R
@@ -54,7 +54,7 @@ check_sessionlogs <- function(sessionlogs, dates, device_name, all_time = FALSE)
         total_exercises = 0,
         total_videos = 0,
         month_end = month_end,
-        centre = device_name,
+        centre = device_name %>% pull(name),
         month_active = 0,
         module = "",
         total_logins = 0