Skip to content

Commit

Permalink
Merge branch 'master' of github.com:marbl/canu
Browse files Browse the repository at this point in the history
  • Loading branch information
skoren committed Apr 16, 2018
2 parents 78ec851 + af584f5 commit 65abccb
Show file tree
Hide file tree
Showing 19 changed files with 522 additions and 391 deletions.
12 changes: 12 additions & 0 deletions documentation/source/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,18 @@ What parameters should I use for my reads?
coverage.


Can I assemble RNA sequence data?
-------------------------------------
Canu will likely mis-assemble, or completely fail to assemble, RNA data. It will do a
reasonable job at generating corrected reads though. Reads are corrected using (local) best
alignments to other reads, and alignments between different isoforms are usually obviously not
'best'. Just like with DNA sequences, similar isoforms can get 'mixed' together. We've heard
of reasonable success from users, but do not have any parameter suggestions to make.

Note that Canu will silently translate 'U' bases to 'T' bases on input, but **NOT** translate
the output bases back to 'U'.


My assembly continuity is not good, how can I improve it?
-------------------------------------
The most important determinant for assembly quality is sequence length, followed by the repeat
Expand Down
38 changes: 36 additions & 2 deletions documentation/source/parameter-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,44 @@ onFailure <string=unset>
Process Control
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. _showNext:

showNext <boolean=false>
Report the first major command that would be run, but don't run it. Processing to get to that
command, for example, checking the output of the previous command or preparing inputs for the
next command, is still performed.
command, for example, checking the output of the previous command or preparing inputs for the next
command, is still performed.

.. _stopOnReadQuality:

stopOnReadQuality <string=true>
If set, Canu will stop with the following error if there are significantly fewer reads or bases
loaded into the read store than what is in the input data.

::

Gatekeeper detected potential problems in your input reads.
Please review the logging in files:
/assembly/godzilla/asm.gkpStore.BUILDING.err
/assembly/godzilla/asm.gkpStore.BUILDING/errorLog
If you wish to proceed, rename the store with the following command and restart canu.
mv /assembly/godzilla/asm.gkpStore.BUILDING \
/assembly/godzilla/asm.gkpStore.ACCEPTED
Option stopOnReadQuality=false skips these checks.

The missing reads could be too short (decrease :ref:`minReadLength <minReadLength>` to include
them), or have invalid bases or quality values. A summary of the files loaded and errors detected
is in the ``asm.gkpStore.BUILDING.err`` file, with full gory details in the
``asm.gkpStore.BUILDING/errorLog``.

To proceed, set ``stopOnReadQuality=false`` or rename the directory as shown.

Note that `U` bases are silently translated to `T` bases, to allow assembly of RNA sequences.

.. _stopAfter:

stopAfter <string=undefined>
If set, Canu will stop processing after a specific stage in the pipeline finishes.
Expand Down
108 changes: 29 additions & 79 deletions src/AS_UTL/AS_UTL_decodeRange.H
Original file line number Diff line number Diff line change
Expand Up @@ -38,115 +38,65 @@
using namespace std;



template<typename T>
class AS_UTL_range {
public:
AS_UTL_range() {
};
AS_UTL_range(T bgn, T end) {
}

bool operator<(const AS_UTL_range<T> &that) const {
if (_bgn < that._bgn)
return(true);
return(_end < that._end);
};

private:
T _bgn;
T _end;
};



template<typename T>
void
AS_UTL_decodeRange(char *range, vector< AS_UTL_range<T> > &ranges) {
char *
AS_UTL_decodeRange(char *range, T &lo, T &hi) {
char *ap = range;
T av = 0;
T bv = 0;

while (*ap != 0) {
strtonumber(av, ap, &ap);
strtonumber(lo, ap, &ap); // Grab the first number.

hi = lo; // Set the second to that.

if (*ap == ',') {
ap++;
ranges.insert(av);
if (*ap == '-') { // If this is a range,
ap++; // grab the second number
strtonumber(hi, ap, &ap);
}

} else if (*ap == 0) {
ranges.insert(av);
if (*ap == ',') // If the next letter continues
return(ap + 1); // move past that and return.

} else if (*ap == '-') {
ap++;
strtonumber(bv, ap, &ap);
if (*ap == 0) // If the next letter is the end
return(NULL); // of the string, return NULL.

for (T xx=av; xx<=bv; xx++)
ranges.insert(xx);
// Otherwise, we can't decode this range.

if (*ap == ',')
ap++;
fprintf(stderr, "ERROR: invalid range '%s'\n", range);
exit(1);

} else if (*ap != 0) {
fprintf(stderr, "ERROR: invalid range '%s'\n", range);
exit(1);
}
}
return(NULL);
}



template<typename T>
void
AS_UTL_decodeRange(char *range, set<T> &ranges) {
AS_UTL_decodeRange(char *range, vector<T> &bgn, vector<T> &end) {
char *ap = range;
T av = 0;
T bv = 0;

while (*ap != 0) {
strtonumber(av, ap, &ap);

if (*ap == ',') {
ap++;
ranges.insert(av);

} else if (*ap == 0) {
ranges.insert(av);

} else if (*ap == '-') {
ap++;
strtonumber(bv, ap, &ap);

for (T xx=av; xx<=bv; xx++)
ranges.insert(xx);
while ((ap != NULL) && (*ap != 0)) {
range = AS_UTL_decodeRange(range, av, bv);

if (*ap == ',')
ap++;

} else if (*ap != 0) {
fprintf(stderr, "ERROR: invalid range '%s'\n", range);
exit(1);
}
bgn.push_back(av);
end.push_back(bv);
}
}



template<typename T>
void
AS_UTL_decodeRange(char *range, T &lo, T &hi) {
AS_UTL_decodeRange(char *range, set<T> &ranges) {
char *ap = range;
T av = 0;
T bv = 0;

strtonumber(lo, ap, &ap);
hi = lo;

if (*ap == '-') {
ap++;
strtonumber(hi, ap, &ap);
while ((ap != NULL) && (*ap != 0)) {
range = AS_UTL_decodeRange(range, av, bv);

} else if (*ap != 0) {
fprintf(stderr, "ERROR: invalid range '%s'\n", range);
exit(1);
for (T xx=av; xx<=bv; xx++)
ranges.insert(xx);
}
}

Expand Down
Loading

0 comments on commit 65abccb

Please sign in to comment.