Skip to content

Commit

Permalink
change input fastq file naming convention to a more generic form
Browse files Browse the repository at this point in the history
  • Loading branch information
Leah Kemp committed Nov 2, 2021
1 parent b95b92a commit 9657608
Show file tree
Hide file tree
Showing 16 changed files with 20 additions and 19 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,11 @@ git clone https://github.com/leahkemp/smncrna_analysis_template.git
#### Fastq naming convention

```bash
sample_S*_R1.fastq.gz
sample.fastq.gz
```

- one fastq file per sample
- sample name matching the sample names in the metadata file and ".fastq.gz" extension

For example see the test fastq files [here](./test/fastq/)

Expand Down
12 changes: 6 additions & 6 deletions diff_expression/diff_expression.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -56,39 +56,39 @@ mirna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_re
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
pirna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_piRNA_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
trna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_tRNA_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
circrna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_circularRNA_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
comment.char = "") %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
gencode_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_gencode_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
mirna_smrnaseq_data <- utils::read.table(base::file.path(config$smrnaseq_results_dir,
"/edgeR/miRBase_mature/mature_counts.csv"),
Expand All @@ -100,7 +100,7 @@ mirna_smrnaseq_data <- utils::read.table(base::file.path(config$smrnaseq_results
base::as.data.frame() %>%
janitor::row_to_names(row_number = 1) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[.]mature", "", .))
dplyr::rename_with(~ base::sub(".mature", "", .))
# formatting to make counts datasets consistent between smrnaseq and excerpt pipelines
mirna_smrnaseq_data <- mirna_smrnaseq_data %>%
Expand Down
12 changes: 6 additions & 6 deletions heatmaps/heatmaps.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -46,39 +46,39 @@ mirna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_re
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
pirna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_piRNA_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
trna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_tRNA_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
circrna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_circularRNA_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
comment.char = "") %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
gencode_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_gencode_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))
mirna_smrnaseq_data <- utils::read.table(base::file.path(config$smrnaseq_results_dir,
"/edgeR/miRBase_mature/mature_counts.csv"),
Expand All @@ -90,7 +90,7 @@ mirna_smrnaseq_data <- utils::read.table(base::file.path(config$smrnaseq_results
base::as.data.frame() %>%
janitor::row_to_names(row_number = 1) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[.]mature", "", .))
dplyr::rename_with(~ base::sub(".mature", "", .))
# formatting to make counts datasets consistent between smrnaseq and excerpt pipelines
mirna_smrnaseq_data <- mirna_smrnaseq_data %>%
Expand Down
12 changes: 6 additions & 6 deletions prepare_counts/prepare_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,39 +16,39 @@ mirna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_re
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))

pirna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_piRNA_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))

trna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_tRNA_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))

circrna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_circularRNA_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
comment.char = "") %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))

gencode_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_gencode_ReadCounts.txt"),
header = TRUE,
stringsAsFactors = FALSE,
check.names = FALSE) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[_]R1.fastq", "", .))
dplyr::rename_with(~ base::sub(".fastq", "", .))

mirna_smrnaseq_data <- utils::read.table(base::file.path(config$smrnaseq_results_dir,
"/edgeR/miRBase_mature/mature_counts.csv"),
Expand All @@ -60,7 +60,7 @@ mirna_smrnaseq_data <- utils::read.table(base::file.path(config$smrnaseq_results
base::as.data.frame() %>%
janitor::row_to_names(row_number = 1) %>%
# remove S*_R1.fastq suffix from the sample/column names
dplyr::rename_with(~ base::sub("[_][S]\\d+[.]mature", "", .))
dplyr::rename_with(~ base::sub(".mature", "", .))

# formatting to make counts datasets consistent between smrnaseq and excerpt pipelines
mirna_smrnaseq_data <- mirna_smrnaseq_data %>%
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 9657608

Please sign in to comment.