Skip to content

Commit

Permalink
replacing custom graphs with GFA format
Browse files Browse the repository at this point in the history
  • Loading branch information
Will Rowe committed Mar 20, 2018
1 parent 4f9020a commit f622eb5
Show file tree
Hide file tree
Showing 17 changed files with 1,068 additions and 1,010 deletions.
47 changes: 29 additions & 18 deletions cmd/align.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"os"
"runtime"
"strings"
"time"

"github.com/pkg/profile"
"github.com/spf13/cobra"
Expand All @@ -44,6 +45,8 @@ var (
clip *int // maximum number of clipped bases allowed during local alignment
indexDir *string // directory containing the index files
fastq *[]string // list of FASTQ files to align
graphDir *string // directory to save gfa graphs to
defaultGraphDir = "./groot-graphs-" + string(time.Now().Format("20060102150405")) // a default graphDir
)

// the align command (used by cobra)
Expand All @@ -67,6 +70,7 @@ func init() {
clip = alignCmd.Flags().IntP("clip", "c", 5, "maximum number of clipped bases allowed during local alignment")
indexDir = alignCmd.Flags().StringP("indexDir", "i", "", "directory containing the index files")
fastq = alignCmd.Flags().StringSliceP("fastq", "f", []string{}, "FASTQ file(s) to align")
graphDir = alignCmd.PersistentFlags().StringP("graphDir", "o", defaultGraphDir, "directory to save variation graphs to")
}

/*
Expand Down Expand Up @@ -126,6 +130,12 @@ func alignParamCheck() error {
}
}
}
// setup the graphDir
if _, err := os.Stat(*graphDir); os.IsNotExist(err) {
if err := os.MkdirAll(*graphDir, 0700); err != nil {
return fmt.Errorf("can't create specified output directory")
}
}
// set number of processors to use
if *proc <= 0 || *proc > runtime.NumCPU() {
*proc = runtime.NumCPU()
Expand All @@ -138,18 +148,15 @@ func alignParamCheck() error {
The main function for the align sub-command
*/
func runAlign() {
// set up logging
logFH, err := os.OpenFile("groot-align.log", os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
if err != nil {
log.Fatal(err)
}
defer logFH.Close()
log.SetOutput(logFH)
// set up profiling
if *profiling == true {
//defer profile.Start(profile.MemProfile, profile.ProfilePath("./")).Stop()
defer profile.Start(profile.ProfilePath("./")).Stop()
}
// start logging
logFH := misc.StartLogging(*logFile)
defer logFH.Close()
log.SetOutput(logFH)
log.Printf("starting the align command")
// check the supplied files and then log some stuff
log.Printf("checking parameters...")
Expand All @@ -173,7 +180,7 @@ func runAlign() {
log.Printf("\tsignature size: %d\n", info.SigSize)
log.Printf("\tJaccard similarity theshold: %0.2f\n", info.JSthresh)
log.Printf("\twindow sized used in indexing: %d\n", info.ReadLength)
log.Print("loading the variation graphs...")
log.Print("loading the groot graphs...")
graphStore := make(graph.GraphStore)
misc.ErrorCheck(graphStore.Load(*indexDir + "/index.graph"))
log.Printf("\tnumber of variation graphs: %d\n", len(graphStore))
Expand All @@ -184,7 +191,7 @@ func runAlign() {
numHF, numBucks := database.Settings()
log.Printf("\tnumber of hash functions per bucket: %d\n", numHF)
log.Printf("\tnumber of buckets: %d\n", numBucks)

///////////////////////////////////////////////////////////////////////////////////////
// create SAM references from the sequences held in the graphs
referenceMap, err := graphStore.GetRefs()
misc.ErrorCheck(err)
Expand All @@ -193,7 +200,7 @@ func runAlign() {
log.Printf("initialising alignment pipeline...")
pipeline := stream.NewPipeline()

// Init processes
// initialise processes
log.Printf("\tinitialising the processes")
dataStream := stream.NewDataStreamer()
fastqHandler := stream.NewFastqHandler()
Expand All @@ -205,12 +212,6 @@ func runAlign() {
// add in the process parameters
dataStream.InputFile = *fastq
fastqChecker.WindowSize = info.ReadLength
/* trying the trimming post-seeding for now...
if *trimSwitch {
fastqChecker.MinReadLength = *minRL
fastqChecker.MinQual = *minQual
}
*/
dbQuerier.Db = database
dbQuerier.CommandInfo = info
dbQuerier.GraphStore = graphStore
Expand All @@ -223,7 +224,7 @@ func runAlign() {
}
samWriter.RefMap = referenceMap

// Arrange pipeline processes
// arrange pipeline processes
log.Printf("\tconnecting data streams")
fastqHandler.Input = dataStream.Output
fastqChecker.Input = fastqHandler.Output
Expand All @@ -235,5 +236,15 @@ func runAlign() {
pipeline.AddProcesses(dataStream, fastqHandler, fastqChecker, dbQuerier, graphAligner, samWriter)
log.Printf("\tnumber of processes added to the alignment pipeline: %d\n", len(pipeline.Processes))
pipeline.Run()

// save the graph files
log.Printf("saving graphs to \"%v\"...", *graphDir)
counter := 0
for _, graph := range graphStore {
graphWritten, err := graph.DumpGraph(*graphDir)
misc.ErrorCheck(err)
counter += graphWritten
}
log.Printf("\tnumber of graphs that had reads align: %d\n", counter)
log.Println("finished")
} // end of align main function
}
Loading

0 comments on commit f622eb5

Please sign in to comment.