From 119865fd1f1da98430d061a6d5a9b1be2d649531 Mon Sep 17 00:00:00 2001 From: Kapya Date: Mon, 13 Jan 2020 07:43:28 +0200 Subject: [PATCH 01/22] changed git commands to use namibia branch --- reporting/alldata.sh | 4 ++-- reporting/monthend.sh | 4 ++-- upgrade | 4 ++-- upgrade_baseline.sh | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/reporting/alldata.sh b/reporting/alldata.sh index bd0bbcc..91f70c4 100755 --- a/reporting/alldata.sh +++ b/reporting/alldata.sh @@ -25,8 +25,8 @@ export exit_bold_mode=`tput rmso` #pull latest changes from master branch in repo cd ~/.scripts -git reset --hard origin/zambia > /dev/null -git pull origin zambia > /dev/null +git reset --hard origin/namibia > /dev/null +git pull origin namibia > /dev/null # Do silent upgrade of all scripts ./upgrade_silent.sh diff --git a/reporting/monthend.sh b/reporting/monthend.sh index 61b294d..015df3b 100755 --- a/reporting/monthend.sh +++ b/reporting/monthend.sh @@ -28,8 +28,8 @@ export exit_bold_mode=`tput rmso` #pull latest changes from master branch in repo cd ~/.scripts -git reset --hard origin/zambia > /dev/null -git pull origin zambia > /dev/null +git reset --hard origin/namibia > /dev/null +git pull origin namibia > /dev/null #Do silent upgrade of all scripts ./upgrade_silent.sh > /dev/null diff --git a/upgrade b/upgrade index 8998285..87ea9fc 100755 --- a/upgrade +++ b/upgrade @@ -1,6 +1,6 @@ #!/bin/bash cd ~/.scripts -git reset --hard origin/zambia -git pull origin zambia +git reset --hard origin/namibia +git pull origin namibia ./setup.sh diff --git a/upgrade_baseline.sh b/upgrade_baseline.sh index 481246f..2985ebe 100755 --- a/upgrade_baseline.sh +++ b/upgrade_baseline.sh @@ -32,7 +32,7 @@ if [ "$?" = "0" ]; then echo "${blue}Baseline Testing already configured${reset}" cd ~/.baseline_testing/ echo "Pulling latest changes to Baseline system..." - git pull origin zambia > /dev/null + git pull origin namibia > /dev/null #make script executable if it isnt chmod +x ~/.baseline_testing/scripts/setup.sh @@ -43,7 +43,7 @@ else echo "${blue}Cloning repository...${reset}" git clone https://github.com/techZM/offline_testing.git .baseline_testing > /dev/null cd ~/.baseline_testing/ - git checkout zambia + git checkout namibia #make script executable if it isnt chmod +x ~/.baseline_testing/scripts/setup.sh From 5aba0372c8364a989aaecf682454e74d2a57042a Mon Sep 17 00:00:00 2001 From: techZM Date: Wed, 15 Jan 2020 22:53:41 +0200 Subject: [PATCH 02/22] add kolibri stop shutdown alias --- .bash_aliases | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bash_aliases b/.bash_aliases index 005eade..00eeaec 100755 --- a/.bash_aliases +++ b/.bash_aliases @@ -1,4 +1,4 @@ -alias shutdown='~/.scripts/backupdb/backup.sh;sudo shutdown -h now' +alias shutdown='kolibri stop;~/.scripts/backupdb/backup.sh;sudo shutdown -h now' alias reboot='sudo reboot' alias whoru='cd ~/.scripts/identify/;python identify.py' alias alldata='~/.scripts/reporting/alldata.sh' From 8c6addd681fb81589e9dff6cfbb02b07aaae8b26 Mon Sep 17 00:00:00 2001 From: L3Vyt <47414785+L3Vyt@users.noreply.github.com> Date: Wed, 29 Jan 2020 18:14:03 +0200 Subject: [PATCH 03/22] Update .bash_aliases --- .bash_aliases | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bash_aliases b/.bash_aliases index 00eeaec..e83a3da 100755 --- a/.bash_aliases +++ b/.bash_aliases @@ -16,5 +16,5 @@ alias restartbl='chmod +x ~/.baseline_testing/scripts/startup_script;~/.baseline alias getkousers='chmod +x ~/.baseline_testing/scripts/start_users_extraction.sh;~/.baseline_testing/scripts/start_users_extraction.sh' # assign live learners to the right groups -alias assign_learners='~/.kolibri_helper_scripts/assign_learners.sh' +alias assign_learners='~/.kolibri_helper_scripts/assign_learners.sh;chmod +x ~/.baseline_testing/scripts/startup_script;~/.baseline_testing/scripts/startup_script' From 8a21094c302eb96d79df01edadfae473731e60bc Mon Sep 17 00:00:00 2001 From: Kapya Date: Tue, 4 Aug 2020 15:44:51 +0200 Subject: [PATCH 04/22] use paste instead of paste0 --- reporting/preproc_tables.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R index 17e691b..c89f9a9 100755 --- a/reporting/preproc_tables.R +++ b/reporting/preproc_tables.R @@ -56,10 +56,10 @@ if(nrow(users) == 0){ channel_metadata <<- channel_metadata %>% left_join(channel_module, by=c("id" = "channel_id")) %>% # create new column with module and abbreviated playlist name - mutate(abbr_name = paste(module,'_',abbreviate(name))) %>% + mutate(abbr_name = paste0(module,'_',abbreviate(name))) %>% # create new column with abbr name and the word progress # will be used as the column name for channel progress in final report - mutate(abbr_name_progress = paste(abbr_name,'_progress')) + mutate(abbr_name_progress = paste0(abbr_name,'_progress')) #create named vector with channel_ids and abbreviated playlist names course_name_id <- unlist(channel_metadata$abbr_name) From 07fad8180c9cc3a4af7a8c6d8a3f623e7bf76ccb Mon Sep 17 00:00:00 2001 From: Kapya Date: Thu, 8 Oct 2020 11:05:45 +0200 Subject: [PATCH 05/22] ignore csv files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 992e600..7a83080 100755 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ backupdb/*.log crazyhours.txt +*.csv .Rproj.user config.yml *.Rproj From b6899d91eb9b84be89b8a305a38d9925a06db21a Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Mon, 15 Feb 2021 15:52:22 +0200 Subject: [PATCH 06/22] changed to namibia branch --- config/flyway_bl.sh | 4 ++-- upgrade_silent.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/flyway_bl.sh b/config/flyway_bl.sh index 6a21289..5e3af31 100755 --- a/config/flyway_bl.sh +++ b/config/flyway_bl.sh @@ -9,7 +9,7 @@ function flyway_bl(){ # Switch to baseline testing directory cd $BL_DIR || exit # Reset the code to match the remote branch - git reset --hard origin/develop + git reset --hard origin/namibia # path to migrations for baseline testing database BL_SQL_PATH=$BL_DIR/migrations @@ -25,7 +25,7 @@ function flyway_bl(){ sudo flyway -locations="filesystem:$BL_SQL_PATH" -url=jdbc:postgresql://$BASELINE_DATABASE_HOST:$BASELINE_DATABASE_PORT/$BASELINE_DATABASE_NAME -user=$BASELINE_DATABASE_USER -password=$BASELINE_DATABASE_PASSWORD "$1" # Reset the code to match the remote branch once again - git reset --hard origin/develop + git reset --hard origin/namibia # switch to the home directory or exit cd || exit } diff --git a/upgrade_silent.sh b/upgrade_silent.sh index 8fe98b3..e7fe445 100755 --- a/upgrade_silent.sh +++ b/upgrade_silent.sh @@ -43,7 +43,7 @@ fi # Check if kolibri helper scripts directory exists. pull it if it does not if [ -d "$kolibri_helper_scripts_dir" ]; then - cd $kolibri_helper_scripts_dir && git reset --hard origin/develop && git pull origin develop && cd ~ || exit + cd $kolibri_helper_scripts_dir && git reset --hard origin/namibia && git pull origin namibia && cd ~ || exit else echo "Helper scripts directory does not exist. Cloning now..." git clone https://github.com/edulution/kolibri_helper_scripts.git $kolibri_helper_scripts_dir From facff5223fe9e3cd74258a81241e618f5496c0c3 Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Thu, 14 Oct 2021 10:37:32 +0200 Subject: [PATCH 07/22] delete learners on provided list when silent upgrade is triggered --- upgrade_silent.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/upgrade_silent.sh b/upgrade_silent.sh index f7f4cac..aa7a4f4 100755 --- a/upgrade_silent.sh +++ b/upgrade_silent.sh @@ -60,4 +60,7 @@ fi # Add any other scripts you would like to run below this line ################### -~/.scripts/config/add_pgtune_settings.sh \ No newline at end of file +~/.scripts/config/add_pgtune_settings.sh + +# Delete learners on provided list +python ~/.kolibri_helper_scripts/delete_learners.py -f ~/.kolibri_helper_scripts/NA_deletion_1021.csv \ No newline at end of file From f1944e0f6ca22fa2464888739439bf55ed19f97a Mon Sep 17 00:00:00 2001 From: Ntipac Date: Mon, 8 Nov 2021 11:48:22 +0200 Subject: [PATCH 08/22] Updated script from deleting any restored learners --- upgrade_silent.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/upgrade_silent.sh b/upgrade_silent.sh index aa7a4f4..f75ce03 100755 --- a/upgrade_silent.sh +++ b/upgrade_silent.sh @@ -62,5 +62,3 @@ fi ################### ~/.scripts/config/add_pgtune_settings.sh -# Delete learners on provided list -python ~/.kolibri_helper_scripts/delete_learners.py -f ~/.kolibri_helper_scripts/NA_deletion_1021.csv \ No newline at end of file From c955477e5a06b49bfcf8e0cb996f8552802c5fd3 Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Mon, 15 Nov 2021 16:45:09 +0200 Subject: [PATCH 09/22] delete any loose csv files in the reports directory before beginning report extraction --- reporting/monthend.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/reporting/monthend.sh b/reporting/monthend.sh index 3bf841b..97caea4 100755 --- a/reporting/monthend.sh +++ b/reporting/monthend.sh @@ -34,6 +34,10 @@ if [[ "$psql_running" == 'Running' ]];then sudo service nginx stop > /dev/null echo "${GREEN}Extracting data for month $1${RESET}" + # Delete any loose csv files in the reports directory before extraction + cd ~/.reports + find . -type f \( -name "*.csv" \) -exec rm {} \; + echo Beginning report extraction..... # fetch the first argument given on the command line and use it as an argument to the Rscript cd ~/.scripts/reporting || exit From 9735c1e2e0d61f53e8d602b6dc6e5ed0817226dc Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Fri, 11 Feb 2022 13:51:05 +0200 Subject: [PATCH 10/22] change timezone to Windhoek on last login --- reporting/preproc_tables.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R index 2ecda2c..cfeb73d 100755 --- a/reporting/preproc_tables.R +++ b/reporting/preproc_tables.R @@ -47,7 +47,7 @@ if (nrow(users) == 0) { dplyr::rename(centre = name) %>% # Convert the last login to the nearest timezone for the centre location dplyr::mutate( - last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz("Africa/Lusaka") + last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz("Africa/Windhoek") ) %>% dplyr::select( id, From f7550d72e65a7e67bcee259176400ff7c59363ea Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Tue, 1 Mar 2022 16:32:40 +0200 Subject: [PATCH 11/22] added script to reinstall dbhelpers package without upgrading other packages (cherry picked from commit 56a13feac2e29d9575a3621ef10115085f942cbf) --- config/reinstall_dbhelpers_package.sh | 1 + 1 file changed, 1 insertion(+) create mode 100755 config/reinstall_dbhelpers_package.sh diff --git a/config/reinstall_dbhelpers_package.sh b/config/reinstall_dbhelpers_package.sh new file mode 100755 index 0000000..0885761 --- /dev/null +++ b/config/reinstall_dbhelpers_package.sh @@ -0,0 +1 @@ +sudo su - -c "R -e \"devtools::install_github('Aypak/dbhelpers', upgrade = 'never')\"" \ No newline at end of file From f1b18b628ef4bac61c277fb53e61576f0b9b8d10 Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Tue, 1 Mar 2022 16:36:12 +0200 Subject: [PATCH 12/22] added dbhelpers reinstall to silent upgrade (cherry picked from commit 1f12a128645a25da369553dfed2725a495d92e5f) --- upgrade_silent.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/upgrade_silent.sh b/upgrade_silent.sh index 37261dc..9793845 100755 --- a/upgrade_silent.sh +++ b/upgrade_silent.sh @@ -63,4 +63,7 @@ fi ~/.scripts/config/add_pgtune_settings.sh # Do channel subscriptions -~/.kolibri_helper_scripts/channel_setup/insert_channel_subscriptions.sh \ No newline at end of file +~/.kolibri_helper_scripts/channel_setup/insert_channel_subscriptions.sh + +# Reinstall dbhelpers package +~/.scripts/config/reinstall_dbhelpers_package.sh \ No newline at end of file From 89b42181b41dceb5662287652713b35e7a79a131 Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Tue, 1 Mar 2022 16:44:06 +0200 Subject: [PATCH 13/22] added shebang to script (cherry picked from commit 5498cd78d1d67b5073e6e94b15eb5f7ea3ecbb54) --- config/reinstall_dbhelpers_package.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/config/reinstall_dbhelpers_package.sh b/config/reinstall_dbhelpers_package.sh index 0885761..c7ce963 100755 --- a/config/reinstall_dbhelpers_package.sh +++ b/config/reinstall_dbhelpers_package.sh @@ -1 +1,2 @@ +#!/bin/bash sudo su - -c "R -e \"devtools::install_github('Aypak/dbhelpers', upgrade = 'never')\"" \ No newline at end of file From 4ec73e216f9e6dc12ec14d359df0f79c12d251f6 Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Wed, 9 Mar 2022 11:32:35 +0200 Subject: [PATCH 14/22] got class_name from memberships and collections. added class_name column to users df (cherry picked from commit f3cebfbb56d5fce74117f81e4326a803ec643df6) --- reporting/preproc_tables.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R index cfeb73d..f02925f 100755 --- a/reporting/preproc_tables.R +++ b/reporting/preproc_tables.R @@ -29,6 +29,18 @@ learners_and_groups <<- memberships %>% dplyr::distinct(user_id, .keep_all = TRUE) %>% dplyr::select(name, user_id) +# Get learners and the classrooms (grades) they belong to +learners_and_grades <- memberships %>% + # filter out memberships of type learnergroup + dplyr::filter(kind == "classroom") %>% + dplyr::group_by(user_id) %>% + # If a learner belongs to multiple classes, separate them with commas + dplyr::mutate(name = paste(name, collapse = ",") %>% stringr::str_trim()) %>% + dplyr::ungroup() %>% + dplyr::distinct(user_id, .keep_all = T) %>% + dplyr::select("class_name" = name, user_id) + + # filter out admins and coaches to get list of users # select only the relevant columns users <<- facilityusers %>% @@ -45,6 +57,7 @@ if (nrow(users) == 0) { # then drop the facility_id column dplyr::left_join(facilities, by = c("facility_id" = "id")) %>% dplyr::rename(centre = name) %>% + dplyr::left_join(learners_and_grades, by = c("id"= "user_id")) %>% # Convert the last login to the nearest timezone for the centre location dplyr::mutate( last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz("Africa/Windhoek") @@ -55,6 +68,7 @@ if (nrow(users) == 0) { username, date_joined, last_login, + class_name, centre, facility_id ) %>% From 89e95d6e70b8807891dd6b9a0b5f984d8701151e Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Wed, 9 Mar 2022 11:32:56 +0200 Subject: [PATCH 15/22] add class_name to final report extract (cherry picked from commit 46aacd90fb02f3faceea6ac8afad1e0f796cb2d5) --- reporting/monthend.R | 1 + 1 file changed, 1 insertion(+) diff --git a/reporting/monthend.R b/reporting/monthend.R index 62ac4d4..60797f0 100755 --- a/reporting/monthend.R +++ b/reporting/monthend.R @@ -123,6 +123,7 @@ monthend <- function(dates, sessionlogs, summarylogs, topics, device_name, inclu first_name, last_name, username, + class_name, centre, total_hours, total_exercises, From e9f60d3f14fb1292a43e60c2895fd08a6555fff1 Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Wed, 9 Mar 2022 14:21:41 +0200 Subject: [PATCH 16/22] defensive calls to dplyr and tidyr functions (cherry picked from commit 018a3ff8a23ddacc74fc3cf3ea4764e7c3ad7e3a) --- reporting/preproc_topics.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/reporting/preproc_topics.R b/reporting/preproc_topics.R index 2bc3fd0..592e431 100755 --- a/reporting/preproc_topics.R +++ b/reporting/preproc_topics.R @@ -12,17 +12,17 @@ get_topics <- function(contentnodes, channelmetadata) { # Filter content nodes of type topic # Where id not equal to channel_id (if id = channel_id and kind = topic, it is actually a channel) topics <- contentnodes %>% - filter( + dplyr::filter( kind == "topic", id != channel_id ) %>% - rename( + dplyr::rename( # Rename the content_id to topic_id topic_id = id, # Rename the title of the node to topic_title topic_title = title ) %>% - select( + dplyr::select( # Select the topic_id, topic_title, and channel_id topic_id, topic_title, @@ -30,24 +30,24 @@ get_topics <- function(contentnodes, channelmetadata) { ) %>% # Create a column called channel_topic # consists of channel_id and topic_id separated by underscore - mutate(channel_topic = paste0(channel_id, "_", topic_id)) + dplyr::mutate(channel_topic = paste0(channel_id, "_", topic_id)) # Join contentnodes and channelmetadata to topics topics_full <- contentnodes %>% - left_join( + dplyr::left_join( topics, c("parent_id" = "topic_id", "channel_id") ) %>% - left_join( + dplyr::left_join( # Join to channel_metadata channel_metadata, c("channel_id" = "id") ) %>% - rename( + dplyr::rename( # Rename the name column from channel_meta to channel_name channel_name = name ) %>% - select( + dplyr::select( content_id, channel_id, content_title = title, @@ -69,12 +69,12 @@ get_topics <- function(contentnodes, channelmetadata) { #' get_topic_nodes_count <- function(topics) { topic_nodes_count <- topics %>% - filter( + dplyr::filter( kind != "topic", content_id != channel_id ) %>% - count(channel_id, topic_id, kind, name = "nodes_count") %>% - unite( + dplyr::count(channel_id, topic_id, kind, name = "nodes_count") %>% + tidyr::unite( "channel_topic_kind", c("channel_id", "topic_id", "kind"), sep = "_", @@ -85,4 +85,4 @@ get_topic_nodes_count <- function(topics) { # Call the functions above and save the results to variables # channel contents and channel_metadata expected to be in global environment topics <- get_topics(channel_contents, channel_metadata) -topic_nodes_count <- get_topic_nodes_count(topics) \ No newline at end of file +topic_nodes_count <- get_topic_nodes_count(topics) From df6dbf7f706515a3f949b917c781842570bd476d Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Wed, 16 Mar 2022 13:43:41 +0200 Subject: [PATCH 17/22] added new alias to regenerate lessons and quizzes (cherry picked from commit dba7428cd31ef64e37228799669e5bc9e2c75350) --- config/.bash_aliases | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/.bash_aliases b/config/.bash_aliases index 6cf8fff..634cac1 100755 --- a/config/.bash_aliases +++ b/config/.bash_aliases @@ -25,6 +25,9 @@ alias flyway_bl='~/.scripts/config/flyway_bl.sh' # assign live learners to the right groups alias assign_learners='~/.kolibri_helper_scripts/assign_learners.sh;~/.baseline_testing/scripts/startup_script' +# Regenerate lessons and quizzes, then assign learners and restart both kolibri and baseline +alias make_quiz='kolibri stop;python ~/.kolibri_helper_scripts/setup.py;~/.kolibri_helper_scripts/assign_learners.sh;~/.scripts/restart_kolibri.sh;~/.baseline_testing/scripts/startup_script' + #Import all Kolibri channels from the internet alias update_channels='~/.kolibri_helper_scripts/channel_setup/import_channels_network.sh' From f1ecaa58ebf2100171867613a0306e2f7ef3d9cd Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Fri, 15 Jul 2022 17:17:42 +0200 Subject: [PATCH 18/22] changed timezone to match country --- reporting/get_db_tables.R | 5 + reporting/monthend.R | 342 +++++++-------- reporting/preproc_tables.R | 205 ++++----- reporting/process_dateinput.R | 70 ++-- reporting/transforms.R | 760 ++++++++++++++++++---------------- 5 files changed, 722 insertions(+), 660 deletions(-) diff --git a/reporting/get_db_tables.R b/reporting/get_db_tables.R index b100642..e1c7452 100755 --- a/reporting/get_db_tables.R +++ b/reporting/get_db_tables.R @@ -63,6 +63,11 @@ content_sessionlogs <<- conn %>% dplyr::tbl("logger_contentsessionlog") %>% dplyr::collect() +# user sessionlogs +user_sessionlogs <<- conn %>% + dplyr::tbl("logger_usersessionlog") %>% + dplyr::collect() + # get channel content channel_contents <<- conn %>% dplyr::tbl("content_contentnode") %>% diff --git a/reporting/monthend.R b/reporting/monthend.R index 60797f0..fb297b9 100755 --- a/reporting/monthend.R +++ b/reporting/monthend.R @@ -1,167 +1,175 @@ -# Clear work space -rm(list = ls()) - -# Load libraries -suppressMessages(library(timeDate)) -suppressMessages(library(tidyr)) -suppressMessages(library(plyr)) -suppressMessages(library(dplyr)) -suppressMessages(library(tools)) -suppressMessages(library(gsubfn)) -suppressMessages(library(stringr)) -suppressMessages(library(rebus)) -suppressMessages(library(dbhelpers)) - -# Source helper functions and prerequiste data into global scope -source("helpers.R") -source("get_db_tables.R") -source("preproc_tables.R") -source("preproc_topics.R") -source("check_completed_ex_vid_count.R") -source("check_sessionlogs.R") -source("transforms.R") -source("process_dateinput.R") - -# Prevent displaying warning messages from script on console(errors will still show) -options(warn = -1) - -#' Function to get data extract only for month that user inputs -#' -#' @param dates A named vector derived from the \code{process_dateinput} function, containing the start and end dates for data extraction -#' @param sessionlogs A \code{data.frame} containing ContentSessionLogs from Kolibri -#' @param summarylogs A \code{data.frame} containing ContentSummaryLogs from Kolibri -#' @param topics A \code{data.frame} containing ContentNodes of kind topic from Kolibri -#' @param device_name A vector containing the device name, derived from Collections in Kolibri -#' @param include_coach_content A \code{boolean} representing whether or not to include activity on ContentNodes flagged as coach content in the data extraction. Default value FALSE -#' -#' @return A \code{data.frame} containing activity data from between the start and end dates -#' -monthend <- function(dates, sessionlogs, summarylogs, topics, device_name, include_coach_content = FALSE) { - # Get the dates needed from the dates vector supplied - year_month <- dates$year_month - month_start <- dates$month_start - month_end <- dates$month_end - - # Add start_date_only to session logs - if (isTRUE(include_coach_content)) { - sessionlogs <- sessionlogs %>% - dplyr::mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d")) - } else { - sessionlogs <- sessionlogs %>% - dplyr::filter(!content_id %in% coach_content$id) %>% - dplyr::mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d")) - - summarylogs <- summarylogs %>% - dplyr::filter(!content_id %in% coach_content$id) - } - - - # Get total time spent by each user between month start and month end - time_spent_by_user <- get_time_spent_by_user(sessionlogs, month_start, month_end) - - # Get the number of distinct days a user logeed in using the start_timestamp date only - logins_by_user <- get_logins_by_user(sessionlogs, month_start, month_end) - - # Get the total number of completed exercises and videos between month start and month end - completed_ex_vid_count <- get_completed_ex_vid_count(sessionlogs, month_start, month_end) - - # get total time spent by channel - time_by_channel <- get_time_by_channel(sessionlogs, month_start, month_end) - - # Get exercises and videos completed for each channel - ex_vid_by_channel <- get_ex_vid_by_channel(sessionlogs, month_start, month_end) - - # get total_progress by channel_id for all time - prog_by_user_by_channel <- get_prog_by_user_by_channel(sessionlogs) - - # Exercises and videos completed by topic for the dates supplied - month_summary_exvid_by_topic <- get_month_summary_exvid_by_topic( - sessionlogs, - topics, - month_start, - month_end - ) - - # Time spent by topic for the dates supplied - month_summary_time_by_topic <- get_month_summary_time_by_topic( - sessionlogs, - topics, - month_start, - month_end - ) - - # Join all of the transformations together by user_id to make a complete report - rpt <- users %>% - dplyr::left_join(time_spent_by_user, by = c("id" = "user_id")) %>% - dplyr::left_join(completed_ex_vid_count, by = c("id" = "user_id")) %>% - dplyr::left_join(logins_by_user, by = c("id" = "user_id")) %>% - dplyr::left_join(time_by_channel, by = c("id" = "user_id")) %>% - dplyr::left_join(prog_by_user_by_channel, by = c("id" = "user_id")) %>% - dplyr::left_join(ex_vid_by_channel, by = c("id" = "user_id")) %>% - dplyr::left_join(month_summary_exvid_by_topic, by = c("id" = "user_id")) %>% - dplyr::left_join(month_summary_time_by_topic, by = c("id" = "user_id")) %>% - # Add new columns - dplyr::mutate( - month_active = ifelse(total_hours > 0, 1, 0), - module = rep("numeracy"), - # Set total exercises and total videos to 0 if total hours is 0 - total_exercises = replace(total_exercises, total_hours == 0, 0), - total_videos = replace(total_videos, total_hours == 0, 0), - # Derive the first name and last name columns using helper functions - first_name = dbhelpers::get_first_name(full_name), - last_name = dbhelpers::get_last_name(full_name), - # Format the month end column into a string in the form YYYY-MM-DD - month_end = rep(strftime(month_end, "%Y-%m-%d")) - ) - - # Convert id column from uuid to character string - rpt <- rpt %>% - dplyr::mutate(id = str_replace_all(id, "-", "")) %>% - # Reorder columns. put familiar columns first - dplyr::select( - id, - first_name, - last_name, - username, - class_name, - centre, - total_hours, - total_exercises, - total_videos, - month_end, - last_login, - month_active, - module, - total_logins, - everything() - ) - - # Write report to csv - write.csv( - rpt, - file = generate_filename("monthend_", year_month, device_name), - col.names = FALSE, - row.names = FALSE, - na = "0" - ) - system("echo Report extracted successfully!") - quit(save = "no") -} - -# Get user input from the command-line -input <- commandArgs(TRUE) - -# Process the user input and get a vector of dates -dates_vec <- process_dateinput(input) - -# Check if content logs exist between the month start and month end -check_sessionlogs(content_sessionlogs, dates_vec, device_name) - -# Extract the month end report -monthend( - dates = dates_vec, - sessionlogs = content_sessionlogs, - summarylogs = content_summarylogs, - topics = topics, - device_name = device_name -) \ No newline at end of file +# Clear work space +rm(list = ls()) + +# Load libraries +suppressMessages(library(timeDate)) +suppressMessages(library(tidyr)) +suppressMessages(library(plyr)) +suppressMessages(library(dplyr)) +suppressMessages(library(tools)) +suppressMessages(library(gsubfn)) +suppressMessages(library(stringr)) +suppressMessages(library(rebus)) +suppressMessages(library(dbhelpers)) + +# Source helper functions and prerequiste data into global scope +source("helpers.R") +source("get_db_tables.R") +source("preproc_tables.R") +source("preproc_topics.R") +source("check_completed_ex_vid_count.R") +source("check_sessionlogs.R") +source("transforms.R") +source("process_dateinput.R") + +# Prevent displaying warning messages from script on console(errors will still show) +options(warn = -1) + +#' Function to get data extract only for month that user inputs +#' +#' @param dates A named vector derived from the \code{process_dateinput} function, containing the start and end dates for data extraction +#' @param sessionlogs A \code{data.frame} containing ContentSessionLogs from Kolibri +#' @param usersessionlogs A \code{data.frame} containing UserSessionLogs from Kolibri +#' @param summarylogs A \code{data.frame} containing ContentSummaryLogs from Kolibri +#' @param topics A \code{data.frame} containing ContentNodes of kind topic from Kolibri +#' @param device_name A vector containing the device name, derived from Collections in Kolibri +#' @param include_coach_content A \code{boolean} representing whether or not to include activity on ContentNodes flagged as coach content in the data extraction. Default value FALSE +#' +#' @return A \code{data.frame} containing activity data from between the start and end dates +#' +monthend <- function(dates, usersessionlogs, sessionlogs, summarylogs, topics, device_name, include_coach_content = FALSE) { + # Get the dates needed from the dates vector supplied + year_month <- dates$year_month + month_start <- dates$month_start + month_end <- dates$month_end + + # Add start_date_only to session logs + if (isTRUE(include_coach_content)) { + sessionlogs <- sessionlogs %>% + dplyr::mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d")) + } else { + sessionlogs <- sessionlogs %>% + dplyr::filter(!content_id %in% coach_content$id) %>% + dplyr::mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d")) + + summarylogs <- summarylogs %>% + dplyr::filter(!content_id %in% coach_content$id) + } + + + # Get total time spent by each user between month start and month end + time_spent_by_user <- get_time_spent_by_user(sessionlogs, month_start, month_end) + + # Get the number of distinct days a user logeed in using the start_timestamp date only + logins_by_user <- get_logins_by_user(usersessionlogs, month_start, month_end) + + # Get the total number of completed exercises and videos between month start and month end + completed_ex_vid_count <- get_completed_ex_vid_count(summarylogs, month_start, month_end) + + # get total time spent by channel + time_by_channel <- get_time_by_channel(sessionlogs, month_start, month_end) + + # Get exercises and videos completed for each channel + ex_vid_by_channel <- get_ex_vid_by_channel(summarylogs, month_start, month_end) + + # Get the summary progress by topic and content type + ex_vid_progress_by_topic <- get_summary_act_by_topic(summarylogs, topics, topic_nodes_count) + + # get total_progress by channel_id for all time + prog_by_user_by_channel <- get_prog_by_user_by_channel(sessionlogs) + + # Exercises and videos completed by topic for the dates supplied + month_summary_exvid_by_topic <- get_month_summary_exvid_by_topic( + summarylogs, + topics, + month_start, + month_end + ) + + # Time spent by topic for the dates supplied + month_summary_time_by_topic <- get_month_summary_time_by_topic( + sessionlogs, + topics, + month_start, + month_end + ) + + # Join all of the transformations together by user_id to make a complete report + rpt <- users %>% + dplyr::left_join(time_spent_by_user, by = c("id" = "user_id")) %>% + dplyr::left_join(completed_ex_vid_count, by = c("id" = "user_id")) %>% + dplyr::left_join(logins_by_user, by = c("id" = "user_id")) %>% + dplyr::left_join(time_by_channel, by = c("id" = "user_id")) %>% + dplyr::left_join(prog_by_user_by_channel, by = c("id" = "user_id")) %>% + dplyr::left_join(ex_vid_by_channel, by = c("id" = "user_id")) %>% + dplyr::left_join(month_summary_exvid_by_topic, by = c("id" = "user_id")) %>% + dplyr::left_join(month_summary_time_by_topic, by = c("id" = "user_id")) %>% + dplyr::left_join(ex_vid_progress_by_topic, by = c("id" = "user_id")) %>% + # Add new columns + dplyr::mutate( + month_active = ifelse(total_hours > 0, 1, 0), + module = rep("numeracy"), + # Set total exercises and total videos to 0 if total hours is 0 + total_exercises = replace(total_exercises, total_hours == 0, 0), + total_videos = replace(total_videos, total_hours == 0, 0), + # Derive the first name and last name columns using helper functions + first_name = dbhelpers::get_first_name(full_name), + last_name = dbhelpers::get_last_name(full_name), + # Format the month end column into a string in the form YYYY-MM-DD + month_end = rep(strftime(month_end, "%Y-%m-%d")) + ) + + # Convert id column from uuid to character string + rpt <- rpt %>% + dplyr::mutate(id = str_replace_all(id, "-", "")) %>% + # Reorder columns. put familiar columns first + dplyr::select( + id, + first_name, + last_name, + username, + class_name, + centre, + total_hours, + total_exercises, + total_videos, + month_end, + last_login, + month_active, + module, + total_logins, + everything() + ) %>% + # Replace NAs in numeric columns with 0 + mutate_if(is.numeric, replace_na, replace = 0) + + # Write report to csv + write.csv( + rpt, + file = generate_filename("monthend_", year_month, device_name), + col.names = FALSE, + row.names = FALSE, + na = "0" + ) + system("echo Report extracted successfully!") + quit(save = "no") +} + +# Get user input from the command-line +input <- commandArgs(TRUE) + +# Process the user input and get a vector of dates +dates_vec <- process_dateinput(input) + +# Check if content logs exist between the month start and month end +check_sessionlogs(content_sessionlogs, dates_vec, device_name) + +# Extract the month end report +monthend( + dates = dates_vec, + usersessionlogs = user_sessionlogs, + sessionlogs = content_sessionlogs, + summarylogs = content_summarylogs, + topics = topics, + device_name = device_name +) diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R index f02925f..6004c95 100755 --- a/reporting/preproc_tables.R +++ b/reporting/preproc_tables.R @@ -1,101 +1,104 @@ -suppressMessages(library(plyr)) -suppressMessages(library(dplyr)) -suppressMessages(library(lubridate)) - -# get the default facility id and from it get the device name(facility name) -default_facility_id <<- default_facility_id %>% - dplyr::pull(default_facility_id) - -# get a df of all of the facilities on the device -facilities <<- collections %>% dplyr::filter(kind == "facility") - -# keep the default facility as the device name (will be used to name the output file) -device_name <<- collections %>% - dplyr::filter(id == default_facility_id) %>% - # Get only the first 5 characters of the name - dplyr::mutate(name = str_sub(name, 1, 5)) %>% - # Select only the name column - dplyr::select(name) - -# join collections to memberships. (used for getting user groups) -memberships <<- memberships %>% - dplyr::left_join(collections, - by = c("collection_id" = "id") - ) - -# get dataframe containing learners and groups they belong to -learners_and_groups <<- memberships %>% - dplyr::filter(kind == "learnergroup") %>% - dplyr::distinct(user_id, .keep_all = TRUE) %>% - dplyr::select(name, user_id) - -# Get learners and the classrooms (grades) they belong to -learners_and_grades <- memberships %>% - # filter out memberships of type learnergroup - dplyr::filter(kind == "classroom") %>% - dplyr::group_by(user_id) %>% - # If a learner belongs to multiple classes, separate them with commas - dplyr::mutate(name = paste(name, collapse = ",") %>% stringr::str_trim()) %>% - dplyr::ungroup() %>% - dplyr::distinct(user_id, .keep_all = T) %>% - dplyr::select("class_name" = name, user_id) - - -# filter out admins and coaches to get list of users -# select only the relevant columns -users <<- facilityusers %>% - dplyr::filter(!id %in% roles$user_id) - -if (nrow(users) == 0) { - # If there are no users on the device, stop the program and inform the user - stop("No users found. Nothing to extract") -} else { - # select the relevant columns from the users df - users <<- users %>% - # join users to facilities, - # rename the facility name to centre - # then drop the facility_id column - dplyr::left_join(facilities, by = c("facility_id" = "id")) %>% - dplyr::rename(centre = name) %>% - dplyr::left_join(learners_and_grades, by = c("id"= "user_id")) %>% - # Convert the last login to the nearest timezone for the centre location - dplyr::mutate( - last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz("Africa/Windhoek") - ) %>% - dplyr::select( - id, - full_name, - username, - date_joined, - last_login, - class_name, - centre, - facility_id - ) %>% - dplyr::select(-facility_id) -} - -# join channel metadata to channel_module -channel_metadata <<- channel_metadata %>% - dplyr::left_join(channel_module, by = c("id" = "channel_id")) %>% - # create new column with module and abbreviated playlist name - dplyr::mutate(abbr_name = paste0(module, "_", abbreviate(name))) %>% - # create new column with abbr name and the word progress - # will be used as the column name for channel progress in final report - dplyr::mutate(abbr_name_progress = paste0(abbr_name, "_progress")) - -# create named vector with channel_ids and abbreviated playlist names -course_name_id <- unlist(channel_metadata$abbr_name) -names(course_name_id) <- unlist(channel_metadata$id) - -# create named vector with abbr_name_progress and make the channel ids the names of each of the elements -course_name_id_progress <- - unlist(channel_metadata$abbr_name_progress) -names(course_name_id_progress) <- unlist(channel_metadata$id) - -# get number of content items by channel.used to compute overall progress in channel -num_contents_by_channel <<- channel_contents %>% - dplyr::filter(!kind %in% c("topic", "channel")) %>% - dplyr::count(channel_id, name = "total_items") - -coach_content <<- channel_contents %>% dplyr::filter(coach_content == TRUE) +suppressMessages(library(plyr)) +suppressMessages(library(dplyr)) +suppressMessages(library(lubridate)) + +centre_timezone <- "Africa/Windhoek" + +# get the default facility id and from it get the device name(facility name) +default_facility_id <<- default_facility_id %>% + dplyr::pull(default_facility_id) + +# get a df of all of the facilities on the device +facilities <<- collections %>% dplyr::filter(kind == "facility") + +# keep the default facility as the device name (will be used to name the output file) +device_name <<- collections %>% + dplyr::filter(id == default_facility_id) %>% + # Get only the first 5 characters of the name + dplyr::mutate(name = str_sub(name, 1, 5)) %>% + # Select only the name column + dplyr::select(name) + +# join collections to memberships. (used for getting user groups) +memberships <<- memberships %>% + dplyr::left_join(collections, + by = c("collection_id" = "id") + ) + +# get dataframe containing learners and groups they belong to +learners_and_groups <<- memberships %>% + dplyr::filter(kind == "learnergroup") %>% + dplyr::distinct(user_id, .keep_all = TRUE) %>% + dplyr::select(name, user_id) + +# Get learners and the classrooms (grades) they belong to +learners_and_grades <- memberships %>% + # filter out memberships of type learnergroup + dplyr::filter(kind == "classroom") %>% + dplyr::group_by(user_id) %>% + # If a learner belongs to multiple classes, separate them with commas + dplyr::mutate(name = paste(name, collapse = ",") %>% stringr::str_trim()) %>% + dplyr::ungroup() %>% + dplyr::distinct(user_id, .keep_all = T) %>% + dplyr::select("class_name" = name, user_id) + + +# filter out admins and coaches to get list of users +# select only the relevant columns +users <<- facilityusers %>% + dplyr::filter(!id %in% roles$user_id) + +if (nrow(users) == 0) { + # If there are no users on the device, stop the program and inform the user + stop("No users found. Nothing to extract") +} else { + # select the relevant columns from the users df + users <<- users %>% + # join users to facilities, + # rename the facility name to centre + # then drop the facility_id column + dplyr::left_join(facilities, by = c("facility_id" = "id")) %>% + dplyr::rename(centre = name) %>% + dplyr::left_join(learners_and_grades, by = c("id" = "user_id")) %>% + # Convert the last login to the nearest timezone for the centre location + dplyr::mutate( + last_login = lubridate::ymd_hms(last_login) %>% lubridate::with_tz(centre_timezone) + ) %>% + dplyr::select( + id, + full_name, + username, + date_joined, + last_login, + class_name, + centre, + facility_id + ) %>% + dplyr::select(-facility_id) +} + +# join channel metadata to channel_module +channel_metadata <<- channel_metadata %>% + dplyr::left_join(channel_module, by = c("id" = "channel_id")) %>% + # create new column with module and abbreviated playlist name + dplyr::mutate(abbr_name = paste0(module, "_", abbreviate(name))) %>% + # create new column with abbr name and the word progress + # will be used as the column name for channel progress in final report + dplyr::mutate(abbr_name_progress = paste0(abbr_name, "_progress")) + +# create named vector with channel_ids and abbreviated playlist names +course_name_id <- unlist(channel_metadata$abbr_name) +names(course_name_id) <- unlist(channel_metadata$id) + +# create named vector with abbr_name_progress and make the channel ids the names of each of the elements +course_name_id_progress <- + unlist(channel_metadata$abbr_name_progress) + +names(course_name_id_progress) <- unlist(channel_metadata$id) + +# get number of content items by channel.used to compute overall progress in channel +num_contents_by_channel <<- channel_contents %>% + dplyr::filter(!kind %in% c("topic", "channel")) %>% + dplyr::count(channel_id, name = "total_items") + +coach_content <<- channel_contents %>% dplyr::filter(coach_content == TRUE) diff --git a/reporting/process_dateinput.R b/reporting/process_dateinput.R index 14eab71..ea92d58 100755 --- a/reporting/process_dateinput.R +++ b/reporting/process_dateinput.R @@ -1,36 +1,36 @@ -#' Turn the user date input into a named vector -#' containing year_month, month_start and month_end -#' -#' @param dateinput -#' -#' @return -#' @export -#' -#' @examples -process_dateinput <- function(dateinput) { - year_month <- dateinput - - # With user input from the command line, create complete date by prefixing with 01 - upper_limit <- paste("01-", year_month, sep = "") - - # Check if the user input is a valid month and year, and in the form mm-yy - check_date_valid(upper_limit) - - # Get month start and month end as correctly formatted strings - month_end <- as.Date( - timeLastDayInMonth(strftime(upper_limit, "%d-%m-%y"), format = "%y-%m-%d") - ) - - month_start <- as.Date( - timeFirstDayInMonth(strftime(upper_limit, "%d-%m-%y"), format = "%y-%m-%d") - ) - - # return a named vector containing year_month, month_start and month_end - dates_vec <- list( - year_month = year_month, - month_start = month_start, - month_end = month_end - ) - - return(dates_vec) +#' Turn the user date input into a named vector +#' containing year_month, month_start and month_end +#' +#' @param dateinput +#' +#' @return +#' @export +#' +#' @examples +process_dateinput <- function(dateinput) { + year_month <- dateinput + + # With user input from the command line, create complete date by prefixing with 01 + upper_limit <- paste("01-", year_month, sep = "") + + # Check if the user input is a valid month and year, and in the form mm-yy + check_date_valid(upper_limit) + + # Get month start and month end as correctly formatted strings + month_end <- as.Date( + timeLastDayInMonth(strftime(upper_limit, "%d-%m-%y"), format = "%y-%m-%d") + ) + + month_start <- as.Date( + timeFirstDayInMonth(strftime(upper_limit, "%d-%m-%y"), format = "%y-%m-%d") + ) + + # return a named vector containing year_month, month_start and month_end + dates_vec <- list( + year_month = year_month, + month_start = as.Date(month_start), + month_end = as.Date(month_end) + ) + + return(dates_vec) } \ No newline at end of file diff --git a/reporting/transforms.R b/reporting/transforms.R index bac56d5..062a5ee 100755 --- a/reporting/transforms.R +++ b/reporting/transforms.R @@ -1,357 +1,403 @@ -#' Get total time spent by each user between month start and month end -#' -#' @param sessionlogs A dataframe of contentsessionlogs -#' @param lower_lim -#' @param upper_lim -#' -#' @return -#' @export -#' -#' @examples -get_time_spent_by_user <- function(sessionlogs, lower_lim, upper_lim) { - time_spent_by_user <- sessionlogs %>% - dplyr::filter( - start_timestamp >= lower_lim, - end_timestamp <= upper_lim - ) %>% - dplyr::group_by(user_id) %>% - dplyr::summarize(total_hours = sum(time_spent) / 3600) - - print(paste( - "Sucessfully retrieved time spent by user between", - lower_lim, - "and", - upper_lim - )) - - return(time_spent_by_user) -} - -#' Get the number of distinct days a user logged in using the start_timestamp date only -#' -#' @param sessionlogs -#' @param lower_lim -#' @param upper_lim -#' -#' @return -#' @export -#' -#' @examples -get_logins_by_user <- function(sessionlogs, lower_lim, upper_lim) { - logins_by_user <- sessionlogs %>% - dplyr::filter( - start_timestamp >= lower_lim, - end_timestamp <= upper_lim - ) %>% - dplyr::distinct(user_id, start_date_only) %>% - dplyr::count(user_id, name = "total_logins") - - print(paste( - "Sucessfully retrieved logins by user between", - lower_lim, - "and", - upper_lim - )) - - return(logins_by_user) -} - - -#' Get the total number of completed exercises and videos between month start and month end -#' -#' @param sessionlogs -#' @param lower_lim -#' @param upper_lim -#' -#' @return -#' @export -#' -#' @examples -get_completed_ex_vid_count <- function(sessionlogs, lower_lim, upper_lim) { - completed_ex_vid_count <- sessionlogs %>% - dplyr::filter( - start_timestamp >= lower_lim, - end_timestamp <= upper_lim, - progress >= 0.99 - ) %>% - dplyr::count(user_id, kind, name = "count") %>% - check_completed_ex_vid_count() - - print(paste( - "Sucessfully retrieved exercises and videos completed by user between", - lower_lim, - "and", - upper_lim - )) - - return(completed_ex_vid_count) -} - - -#' Get total time spent by channel -#' -#' @param sessionlogs -#' @param lower_lim -#' @param upper_lim -#' -#' @return -#' @export -#' -#' @examples -get_time_by_channel <- function(sessionlogs, lower_lim, upper_lim) { - time_by_channel <- sessionlogs %>% - dplyr::filter( - start_timestamp >= lower_lim, - end_timestamp <= upper_lim - ) %>% - dplyr::group_by(user_id, channel_id) %>% - dplyr::summarise(total_time = sum(time_spent) / 3600) %>% - tidyr::pivot_wider(names_from = channel_id, values_from = total_time) %>% - dplyr::rename_at( - vars(-user_id), - function(x) { - paste0(x, "_playlist_timespent") - } - ) %>% - dplyr::ungroup() - - print(paste( - "Sucessfully retrieved total time by channel by user between", - lower_lim, - "and", - upper_lim - )) - - - return(time_by_channel) - # result of above is a data frame - # time spent on each channel as a separate column with the channel id as the column name - - # change column names which are channel_ids from channel_ids to readable course names - # using the named vector created outside the function - # names(time_by_channel) <- c("user_id",recode(names(time_by_channel)[-1],!!!course_name_id)) -} - - -#' Get exercises and videos completed for each channel -#' -#' @param sessionlogs -#' @param lower_lim -#' @param upper_lim -#' -#' @return -#' @export -#' -#' @examples -get_ex_vid_by_channel <- function(sessionlogs, lower_lim, upper_lim) { - ex_vid_by_channel <- sessionlogs %>% - dplyr::filter( - start_timestamp >= lower_lim, - end_timestamp <= upper_lim, - progress >= 0.99 - ) %>% - dplyr::group_by(user_id, channel_id) %>% - dplyr::count(user_id, kind, name = "count") %>% - tidyr::unite("act_channel", c(channel_id, kind)) %>% - tidyr::pivot_wider(names_from = act_channel, values_from = count) %>% - dplyr::rename_at( - vars(-user_id), - function(x) { - str_replace(x, "_exercise", "_playlist_exercise") %>% - str_replace("_video", "_playlist_video") %>% - str_replace("_document", "_playlist_document") - } - ) %>% - dplyr::ungroup() - - print(paste( - "Sucessfully retrieved exercises and videos by channel by user between", - lower_lim, - "and", - upper_lim - )) - - - return(ex_vid_by_channel) -} - - -#' Get total_progress by channel_id for all time -#' -#' @param sessionlogs -#' -#' @return -#' @export -#' -#' @examples -get_prog_by_user_by_channel <- function(sessionlogs) { - prog_by_user_by_channel <- sessionlogs %>% - dplyr::group_by(user_id, channel_id, content_id) %>% - dplyr::summarise(max_prog = max(progress)) %>% - dplyr::group_by(user_id, channel_id) %>% - dplyr::summarise(total_prog = sum(max_prog)) %>% - # join total prog by channel to number of items by channel - # used to get percent progress in channel - dplyr::left_join(num_contents_by_channel) %>% - # create a column for the percent progress by channel - dplyr::mutate(pct_progress = total_prog / total_items) %>% - # get rid of the columns for total prog and total_items - # turn the progress for each channel into a separate column - dplyr::select(-c(total_prog, total_items)) %>% - tidyr::pivot_wider(names_from = channel_id, values_from = pct_progress) %>% - dplyr::rename_at( - vars(-user_id), - function(x) { - paste0(x, "_playlist_progress") - } - ) %>% - dplyr::ungroup() - - print("Sucessfully retrieved summary channel progress by user") - - return(prog_by_user_by_channel) -} - - -#' Summary timespent and progress by topic and content kind for all time -#' -#' @param summarylogs -#' @param topics -#' @param topic_nodes_count -#' -#' @return -#' @export -#' -#' @examples -get_summary_act_by_topic <- function(summarylogs, topics, topic_nodes_count) { - summary_act_by_topic <- summarylogs %>% - dplyr::left_join( - topics, - by = c("content_id", "channel_id", "kind") - ) %>% - tidyr::unite( - "topic_act_type", - c("channel_id", "topic_id", "kind"), - sep = "_" - ) %>% - dplyr::group_by(user_id, topic_act_type) %>% - dplyr::summarise( - topic_act_timespent = sum(time_spent), - topic_act_totalprog = sum(progress) - ) %>% - dplyr::ungroup() %>% - dplyr::left_join( - topic_nodes_count, - by = c("topic_act_type" = "channel_topic_kind") - ) %>% - dplyr::mutate( - topic_act_progpct = topic_act_totalprog / nodes_count - ) %>% - # Only get user_id, topic_act_type and progpct - dplyr::select( - user_id, - topic_act_type, - topic_act_progpct - ) %>% - replace_na(list(topic_act_progpct = 0L)) - - print("Sucessfully retrieved summary activity by topic") - - return(summary_act_by_topic) -} - - - -#' Get summary of time spent by topic for each user -#' -#' @param sessionlogs -#' @param topics -#' @param lower_lim -#' @param upper_lim -#' -#' @return -#' @export -#' -#' @examples -get_month_summary_time_by_topic <- function(sessionlogs, topics, lower_lim, upper_lim) { - month_summary_time_by_topic <- sessionlogs %>% - dplyr::filter( - start_timestamp >= lower_lim, - end_timestamp <= upper_lim - ) %>% - dplyr::left_join( - topics, - by = c("content_id", "channel_id", "kind") - ) %>% - tidyr::unite( - "topic_act_type", - c("channel_id", "topic_id", "kind"), - sep = "_" - ) %>% - dplyr::group_by(user_id, topic_act_type) %>% - dplyr::summarise( - topic_act_timespent = sum(time_spent) / 3600 - ) %>% - dplyr::ungroup() %>% - dplyr::mutate(topic_act_type = str_c( - # Add the word time spent to topic_act_type - topic_act_type, "timespent" - )) %>% - tidyr::pivot_wider(names_from = topic_act_type, values_from = topic_act_timespent) - - print(paste( - "Sucessfully retrieved summary_progress by user between", - lower_lim, - "and", - upper_lim - )) - - return(month_summary_time_by_topic) -} - - - -#' Get summary of exercises done and videos watched by each user -#' -#' @param sessionlogs -#' @param topics -#' @param lower_lim -#' @param upper_lim -#' -#' @return -#' @export -#' -#' @examples -get_month_summary_exvid_by_topic <- function(sessionlogs, topics, lower_lim, upper_lim) { - month_summary_exvid_by_topic <- sessionlogs %>% - dplyr::filter( - start_timestamp >= lower_lim, - end_timestamp <= upper_lim, - progress >= 0.99 - ) %>% - dplyr::left_join( - topics, - by = c("content_id", "channel_id", "kind") - ) %>% - tidyr::unite( - "topic_act_type", - c("channel_id", "topic_id", "kind"), - sep = "_" - ) %>% - dplyr::count(user_id, topic_act_type, name = "num_completed") %>% - dplyr::ungroup() %>% - dplyr::mutate(topic_act_type = str_c( - # Add the word completed to topic_act_type - topic_act_type, "completed" - )) %>% - tidyr::pivot_wider(names_from = topic_act_type, values_from = num_completed) - - print(paste( - "Sucessfully retrieved summary exercises and videos by topic by user between", - lower_lim, - "and", - upper_lim - )) - - return(month_summary_exvid_by_topic) -} \ No newline at end of file +#' Get total time spent by each user between month start and month end +#' +#' @param sessionlogs A \code{data.frame} of ContentSessionlogs +#' @param lower_lim Lower bound of date range +#' @param upper_lim Upper bound of date range#' +#' +#' @return A \code{data.frame} +#' @export +#' +#' @examples +get_time_spent_by_user <- function(sessionlogs, lower_lim, upper_lim) { + time_spent_by_user <- sessionlogs %>% + dplyr::mutate( + start_timestamp = as.Date(start_timestamp), + end_timestamp = as.Date(end_timestamp) + ) %>% + dplyr::filter( + between(start_timestamp, lower_lim, upper_lim), + between(end_timestamp, lower_lim, upper_lim) + ) %>% + dplyr::group_by(user_id) %>% + dplyr::summarize(total_hours = sum(time_spent) / 3600) + + print(paste( + "Sucessfully retrieved time spent by user between", + lower_lim, + "and", + upper_lim + )) + + return(time_spent_by_user) +} + +#' Get the number of distinct days a user logged in using the start_timestamp date only +#' +#' @param usersessionlogs A \code{data.frame} of UserSessionlogs +#' @param lower_lim Lower bound of date range +#' @param upper_lim Upper bound of date range +#' +#' @return +#' @export +#' +#' @examples +get_logins_by_user <- function(usersessionlogs, lower_lim, upper_lim) { + logins_by_user <- usersessionlogs %>% + dplyr::mutate( + start_timestamp = as.Date(start_timestamp), + last_interaction_timestamp = as.Date(last_interaction_timestamp) + ) %>% + dplyr::filter( + between(start_timestamp, lower_lim, upper_lim), + between(last_interaction_timestamp, lower_lim, upper_lim) + ) %>% + mutate(start_date_only = strftime(start_timestamp, "%Y-%m-%d")) %>% + dplyr::distinct(user_id, start_date_only) %>% + dplyr::count(user_id, name = "total_logins") + + print(paste( + "Sucessfully retrieved logins by user between", + lower_lim, + "and", + upper_lim + )) + + return(logins_by_user) +} + + +#' Get the total number of completed exercises and videos between month start and month end +#' +#' @param summarylogs A \code{data.frame} of ContentSummarylogs +#' @param lower_lim Lower bound of date range +#' @param upper_lim Upper bound of date range +#' +#' @return A \code{data.frame} +#' @export +#' +#' @examples +get_completed_ex_vid_count <- function(summarylogs, lower_lim, upper_lim) { + completed_ex_vid_count <- summarylogs %>% + dplyr::mutate( + completion_timestamp = as.Date(completion_timestamp) + ) %>% + dplyr::filter( + between(completion_timestamp, lower_lim, upper_lim) + ) %>% + dplyr::count(user_id, kind, name = "count") %>% + check_completed_ex_vid_count() + + print(paste( + "Sucessfully retrieved exercises and videos completed by user between", + lower_lim, + "and", + upper_lim + )) + + return(completed_ex_vid_count) +} + + +#' Get number of unique attempted exercise and videos between a date range +#' +#' @param sessionlogs A \code{data.frame} of the ContentSessionlogs +#' @param lower_lim Lower bound of date range +#' @param upper_lim Upper bound of date range +#' +#' @return A \code{data.frame} +#' @export +#' +get_attempted_ex_vid_count <- function(sessionlogs, lower_lim, upper_lim) { + attempted_ex_vid_count <- sessionlogs %>% + dplyr::mutate( + start_timestamp = as.Date(start_timestamp), + end_timestamp = as.Date(end_timestamp) + ) %>% + dplyr::filter( + between(start_timestamp, lower_lim, upper_lim), + between(end_timestamp, lower_lim, upper_lim) + ) %>% + dplyr::distinct(content_id, .keep_all = T) %>% + dplyr::count(user_id, kind, name = "count") %>% + check_completed_ex_vid_count() + + print(paste( + "Sucessfully retrieved exercises and videos completed by user between", + lower_lim, + "and", + upper_lim + )) + + return(attempted_ex_vid_count) +} + +#' Get total time spent by channel +#' +#' @param sessionlogs A \code{data.frame} of ContentSessionlogs +#' @param lower_lim Lower bound of date range +#' @param upper_lim Upper bound of date range +#' +#' @return A \code{data.frame} +#' @export +#' +#' @examples +get_time_by_channel <- function(sessionlogs, lower_lim, upper_lim) { + time_by_channel <- sessionlogs %>% + dplyr::mutate( + start_timestamp = as.Date(start_timestamp), + end_timestamp = as.Date(end_timestamp) + ) %>% + dplyr::filter( + between(start_timestamp, lower_lim, upper_lim), + between(end_timestamp, lower_lim, upper_lim) + ) %>% + dplyr::group_by(user_id, channel_id) %>% + dplyr::summarise(total_time = sum(time_spent) / 3600) %>% + tidyr::pivot_wider( + names_from = channel_id, + names_glue = "{channel_id}_playlist_timespent", + values_from = total_time, + values_fill = 0 + ) %>% + dplyr::ungroup() + + print(paste( + "Sucessfully retrieved total time by channel by user between", + lower_lim, + "and", + upper_lim + )) + + + return(time_by_channel) + # result of above is a data frame + # time spent on each channel as a separate column with the channel id as the column name + + # change column names which are channel_ids from channel_ids to readable course names + # using the named vector created outside the function + # names(time_by_channel) <- c("user_id",recode(names(time_by_channel)[-1],!!!course_name_id)) +} + + +#' Get exercises and videos completed for each channel +#' +#' @param summarylogs A \code{data.frame} of ContentSummarylogs +#' @param lower_lim Lower bound of date range +#' @param upper_lim Upper bound of date range +#' +#' @return A \code{data.frame} +#' @export +#' +#' @examples +get_ex_vid_by_channel <- function(summarylogs, lower_lim, upper_lim) { + ex_vid_by_channel <- summarylogs %>% + dplyr::mutate( + completion_timestamp = as.Date(completion_timestamp) + ) %>% + dplyr::filter( + between(completion_timestamp, lower_lim, upper_lim) + ) %>% + dplyr::count(user_id, channel_id, kind, name = "count") %>% + tidyr::pivot_wider( + names_from = c(channel_id, kind), + names_glue = "{channel_id}_playlist_{kind}", + values_from = count, + values_fill = 0 + ) + + print(paste( + "Sucessfully retrieved exercises and videos by channel by user between", + lower_lim, + "and", + upper_lim + )) + + + return(ex_vid_by_channel) +} + + +#' Get total_progress by channel_id for all time +#' +#' @param sessionlogs A \code{data.frame} of ContentSessionlogs +#' +#' @return A \code{data.frame} +#' @export +#' +#' @examples +get_prog_by_user_by_channel <- function(sessionlogs) { + prog_by_user_by_channel <- sessionlogs %>% + dplyr::group_by(user_id, channel_id, content_id) %>% + dplyr::summarise(max_prog = max(progress)) %>% + dplyr::group_by(user_id, channel_id) %>% + dplyr::summarise(total_prog = sum(max_prog)) %>% + # join total prog by channel to number of items by channel + # used to get percent progress in channel + dplyr::left_join(num_contents_by_channel) %>% + # create a column for the percent progress by channel + dplyr::mutate(pct_progress = total_prog / total_items) %>% + # get rid of the columns for total prog and total_items + # turn the progress for each channel into a separate column + dplyr::select(-c(total_prog, total_items)) %>% + tidyr::pivot_wider( + names_from = channel_id, + names_glue = "{channel_id}_playlist_progress", + values_from = pct_progress, + values_fill = 0 + ) %>% + dplyr::ungroup() + + print("Sucessfully retrieved summary channel progress by user") + + return(prog_by_user_by_channel) +} + + +#' Summary timespent and progress by topic and content kind for all time +#' +#' @param summarylogs A \code{data.frame} of ContentSummarylogs +#' @param topics A \code{data.frame} of the topics, from the get_topics \code{function} +#' @param topic_nodes_count A \code{data.frame} of the topic_nodes_count, from the get_topic_nodes_count \code{function} +#' +#' @return A \code{data.frame} +#' @export +#' +#' @examples +get_summary_act_by_topic <- function(summarylogs, topics, topic_nodes_count) { + summary_act_by_topic <- summarylogs %>% + dplyr::left_join( + topics, + by = c("content_id", "channel_id", "kind") + ) %>% + tidyr::unite( + "topic_act_type", + c("channel_id", "topic_id", "kind"), + sep = "_" + ) %>% + dplyr::group_by(user_id, topic_act_type) %>% + dplyr::summarise( + topic_act_timespent = sum(time_spent), + topic_act_totalprog = sum(progress) + ) %>% + dplyr::ungroup() %>% + dplyr::left_join( + topic_nodes_count, + by = c("topic_act_type" = "channel_topic_kind") + ) %>% + dplyr::mutate( + topic_act_progpct = topic_act_totalprog / nodes_count + ) %>% + # Only get user_id, topic_act_type and progpct + dplyr::select( + user_id, + topic_act_type, + topic_act_progpct + ) %>% + tidyr::pivot_wider(names_from = topic_act_type, values_from = topic_act_progpct, values_fill = 0) + + print("Sucessfully retrieved summary activity by topic") + + return(summary_act_by_topic) +} + + + +#' Get summary of time spent by topic for each user +#' +#' @param sessionlogs A \code{data.frame} of ContentSessionlogs +#' @param topics A \code{data.frame} of the topics, from the get_topics \code{function} +#' @param lower_lim Lower bound of date range +#' @param upper_lim Upper bound of date range +#' +#' @return A \code{data.frame} +#' @export +#' +#' @examples +get_month_summary_time_by_topic <- function(sessionlogs, topics, lower_lim, upper_lim) { + month_summary_time_by_topic <- sessionlogs %>% + dplyr::mutate( + start_timestamp = as.Date(start_timestamp), + end_timestamp = as.Date(end_timestamp) + ) %>% + dplyr::filter( + between(start_timestamp, lower_lim, upper_lim), + between(end_timestamp, lower_lim, upper_lim) + ) %>% + dplyr::left_join( + topics, + by = c("content_id", "channel_id", "kind") + ) %>% + tidyr::unite( + "topic_act_type", + c("channel_id", "topic_id", "kind"), + sep = "_" + ) %>% + dplyr::group_by(user_id, topic_act_type) %>% + dplyr::summarise( + topic_act_timespent = sum(time_spent) / 3600 + ) %>% + dplyr::ungroup() %>% + tidyr::pivot_wider( + names_from = topic_act_type, + names_glue = "{topic_act_type}timespent", + values_from = topic_act_timespent, + values_fill = 0 + ) + + print(paste( + "Sucessfully retrieved summary_progress by user between", + lower_lim, + "and", + upper_lim + )) + + return(month_summary_time_by_topic) +} + + + +#' Get summary of exercises done and videos watched by each user +#' +#' @param sessionlogs A \code{data.frame} of ContentSessionlogs +#' @param topics A \code{data.frame} of the topics, from the get_topics \code{function} +#' @param lower_lim Lower bound of date range +#' @param upper_lim Upper bound of date range +#' +#' @return A \code{data.frame} +#' @export +#' +#' @examples +get_month_summary_exvid_by_topic <- function(summarylogs, topics, lower_lim, upper_lim) { + month_summary_exvid_by_topic <- summarylogs %>% + dplyr::mutate( + completion_timestamp = as.Date(completion_timestamp) + ) %>% + dplyr::filter( + between(completion_timestamp, lower_lim, upper_lim) + ) %>% + dplyr::left_join( + topics, + by = c("content_id", "channel_id", "kind") + ) %>% + dplyr::count( + user_id, + channel_id, + topic_id, kind, + name = "num_completed" + ) %>% + tidyr::pivot_wider( + names_from = c(channel_id, topic_id, kind), + names_glue = "{channel_id}_{topic_id}_{kind}completed", + values_from = num_completed, + values_fill = 0 + ) + + print(paste( + "Sucessfully retrieved summary exercises and videos by topic by user between", + lower_lim, + "and", + upper_lim + )) + + return(month_summary_exvid_by_topic) +} From a317ddb3423149cb8a100a0ce6e9fbe6183f9b3e Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Fri, 15 Jul 2022 14:11:28 +0200 Subject: [PATCH 19/22] added flyway_bl migrate to restartko alias (cherry picked from commit 4cb20303d713df7456f32b41a7d0155c3fa68fb7) --- config/.bash_aliases | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/.bash_aliases b/config/.bash_aliases index 634cac1..2fb8099 100755 --- a/config/.bash_aliases +++ b/config/.bash_aliases @@ -10,7 +10,7 @@ alias whoru='cd ~/.scripts/identify/;python identify.py' alias alldata='~/.scripts/reporting/alldata.sh' alias monthend='~/.scripts/reporting/monthend.sh' alias monthend_swap='KOLIBRI_DATABASE_NAME=$KOLIBRI_SWAP_DB;BASELINE_DATABASE_NAME=$BASELINE_SWAP_DB;~/.scripts/reporting/monthend.sh' -alias restartko='~/.scripts/restart_kolibri.sh;~/.baseline_testing/scripts/startup_script' +alias restartko='~/.scripts/restart_kolibri.sh;~/.scripts/config/flyway_bl.sh migrate;~/.baseline_testing/scripts/startup_script' alias backup='python -m kolibri stop;sudo service nginx stop;~/.scripts/backupdb/backup.sh' alias upgrade='~/upgrade' alias tunnel='~/.scripts/ssh_tunnel/create_ssh_tunnel.sh' From 127d2a66a85b8d77d9d31643c026298737e165ea Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Wed, 7 Sep 2022 12:10:23 +0200 Subject: [PATCH 20/22] fixed merge conflict --- config/add_pgtune_settings.sh | 56 +++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/config/add_pgtune_settings.sh b/config/add_pgtune_settings.sh index 66787c8..c0c2785 100755 --- a/config/add_pgtune_settings.sh +++ b/config/add_pgtune_settings.sh @@ -1,27 +1,39 @@ #!/bin/bash -# Get backup of postgresql conf file -# Remove all lines after the Add settings for extensions here line -sudo sed -i.backup '1,/Add settings for extensions here/!d' /etc/postgresql/11/main/postgresql.conf +DIRECTORY="/etc/postgresql/13/main" -# Add the settings for 4 cores, 4GB RAM, HDD storage +# Store path to conf file in variable +CONF_FILE="$DIRECTORY/postgresql.conf" -echo "max_connections = 200" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "shared_buffers = 1GB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "effective_cache_size = 3GB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "maintenance_work_mem = 256MB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "checkpoint_completion_target = 0.9" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "wal_buffers = 16MB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "default_statistics_target = 100" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "random_page_cost = 4" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "effective_io_concurrency = 2" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "work_mem = 2621kB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "min_wal_size = 1GB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "max_wal_size = 4GB" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "max_worker_processes = 4" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "max_parallel_workers_per_gather = 2" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "max_parallel_workers = 4" | sudo tee -a /etc/postgresql/11/main/postgresql.conf -echo "max_parallel_maintenance_workers = 2" | sudo tee -a /etc/postgresql/11/main/postgresql.conf +if [ ! -d "$DIRECTORY" ]; then + echo "Postgres 13 has not been set up. Skipping...." +else + echo "Postgres 13 has been set up. Adding tuning settings" -# restart the postgresql service -sudo systemctl restart postgresql \ No newline at end of file + # Get backup of postgresql conf file + # Remove all lines after the Add settings for extensions here line + sudo sed -i.backup '1,/Add settings for extensions here/!d' "$CONF_FILE" + + # Add the settings for 2 cores, 4GB RAM, HDD storage, data warehouse + + echo "shared_preload_libraries = 'pg_stat_statements'" | sudo tee -a "$CONF_FILE" + echo "max_connections = 200" | sudo tee -a "$CONF_FILE" + echo "shared_buffers = 1GB" | sudo tee -a "$CONF_FILE" + echo "effective_cache_size = 3GB" | sudo tee -a "$CONF_FILE" + echo "maintenance_work_mem = 512MB" | sudo tee -a "$CONF_FILE" + echo "checkpoint_completion_target = 0.9" | sudo tee -a "$CONF_FILE" + echo "wal_buffers = 16MB" | sudo tee -a "$CONF_FILE" + echo "default_statistics_target = 500" | sudo tee -a "$CONF_FILE" + echo "random_page_cost = 4" | sudo tee -a "$CONF_FILE" + echo "effective_io_concurrency = 2" | sudo tee -a "$CONF_FILE" + echo "work_mem = 13107kB" | sudo tee -a "$CONF_FILE" + echo "min_wal_size = 4GB" | sudo tee -a "$CONF_FILE" + echo "max_wal_size = 16GB" | sudo tee -a "$CONF_FILE" + echo "max_worker_processes = 2" | sudo tee -a "$CONF_FILE" + echo "max_parallel_workers_per_gather = 1" | sudo tee -a "$CONF_FILE" + echo "max_parallel_workers = 2" | sudo tee -a "$CONF_FILE" + echo "max_parallel_maintenance_workers = 1" | sudo tee -a "$CONF_FILE" + + # restart the postgresql service + sudo systemctl restart postgresql +fi \ No newline at end of file From f026e227b48bc323cdc89d3c1ab98e41dcb08e35 Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Wed, 21 Sep 2022 09:51:57 +0200 Subject: [PATCH 21/22] removed dependency on channel module table. all courses are numeracy for now (cherry picked from commit b133218e1c01d77d50948ba77582aa2d1ef5c896) --- reporting/get_db_tables.R | 6 +++--- reporting/preproc_tables.R | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/reporting/get_db_tables.R b/reporting/get_db_tables.R index e1c7452..e56ac28 100755 --- a/reporting/get_db_tables.R +++ b/reporting/get_db_tables.R @@ -49,9 +49,9 @@ default_facility_id <<- conn %>% dplyr::collect() # get module for each channel -channel_module <<- conn %>% - dplyr::tbl("channel_module") %>% - dplyr::collect() +# channel_module <<- conn %>% +# dplyr::tbl("channel_module") %>% +# dplyr::collect() # content summary logs content_summarylogs <<- conn %>% diff --git a/reporting/preproc_tables.R b/reporting/preproc_tables.R index 6004c95..aab3685 100755 --- a/reporting/preproc_tables.R +++ b/reporting/preproc_tables.R @@ -79,9 +79,11 @@ if (nrow(users) == 0) { # join channel metadata to channel_module channel_metadata <<- channel_metadata %>% - dplyr::left_join(channel_module, by = c("id" = "channel_id")) %>% + # dplyr::left_join(channel_module, by = c("id" = "channel_id")) %>% # create new column with module and abbreviated playlist name - dplyr::mutate(abbr_name = paste0(module, "_", abbreviate(name))) %>% + # dplyr::mutate(abbr_name = paste0(module, "_", abbreviate(name))) %>% +# Paste the word numeracy and the abbreviated channel name (All courses are numeracy) + dplyr::mutate(abbr_name = paste0("numeracy", "_", abbreviate(name))) %>% # create new column with abbr name and the word progress # will be used as the column name for channel progress in final report dplyr::mutate(abbr_name_progress = paste0(abbr_name, "_progress")) From 13a81e6c52fcf02723a385259b63e54d88247d72 Mon Sep 17 00:00:00 2001 From: Kapya Sakala Date: Wed, 28 Sep 2022 17:17:41 +0200 Subject: [PATCH 22/22] fixed bug with centre col when no sessionlogs exist for inputted month (cherry picked from commit 8e228313c38cc5147eed75219942d0bf65360c41) --- reporting/check_sessionlogs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reporting/check_sessionlogs.R b/reporting/check_sessionlogs.R index cfa631a..2b8e08d 100755 --- a/reporting/check_sessionlogs.R +++ b/reporting/check_sessionlogs.R @@ -54,7 +54,7 @@ check_sessionlogs <- function(sessionlogs, dates, device_name, all_time = FALSE) total_exercises = 0, total_videos = 0, month_end = month_end, - centre = device_name, + centre = device_name %>% pull(name), month_active = 0, module = "", total_logins = 0