Skip to content

Commit

Permalink
Merge pull request NAL-i5K#43 from NAL-i5K/add_appveyor
Browse files Browse the repository at this point in the history
Add appveyor
  • Loading branch information
dytk2134 authored Jun 25, 2018
2 parents 8644f6a + 989e876 commit 8a9ead2
Show file tree
Hide file tree
Showing 25 changed files with 189 additions and 39,799 deletions.
40 changes: 40 additions & 0 deletions .appveyor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
image:
- Visual Studio 2013
- Visual Studio 2015
- Visual Studio 2017

environment:
matrix:
- PYTHON: C:\Python27-x64


# scripts that are called at very beginning, before repo cloning
init:
- git config --global core.autocrlf input

# clone directory
clone_folder: c:\projects\GFF3toolkit

platform: x64

configuration: Release

services:
- postgresql96

install:
- cd c:\projects\GFF3toolkit
- pip install .

build: off

test_script:
- gff3_QC -g example_file/example.gff3 -f example_file/reference.fa -o error.txt
- gff3_fix -qc_r error.txt -g example_file/example.gff3 -og corrected.gff3
- gff3_merge -g1 example_file/new_models.gff3 -g2 example_file/reference.gff3 -f example_file/reference.fa -og merged.gff -r merged_report.txt
- gff3_merge -g1 example_file/new_models_w_replace.gff3 -g2 example_file/reference.gff3 -f example_file/reference.fa -og merged.gff -r merged_report.txt -noAuto
- gff3_sort -g example_file/example.gff3 -og example-sorted.gff3
- ps: Write-Host "Test scripts are finished ..."

deploy: false

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# GFF3toolkit - Python programs for processing GFF3 files

[![Build Status](https://travis-ci.org/NAL-i5K/GFF3toolkit.svg?branch=master)](https://travis-ci.org/NAL-i5K/GFF3toolkit)
[![Build status](https://ci.appveyor.com/api/projects/status/0do5uwu5je0gag1u?svg=true)](https://ci.appveyor.com/project/hsiaoyi0504/gff3toolkit)

## Background

Expand Down
11 changes: 3 additions & 8 deletions gff3tool/lib/ERROR/ERROR.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,11 @@
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.

import sys

# try to import from project first
from os.path import dirname
if dirname(__file__) == '':
lib_path = '../lib'
else:
lib_path = dirname(__file__) + '/../lib'
sys.path.insert(1, lib_path)
from gff3tool.bin import version

__version__ = '0.0.1'
__version__ = version.__version__

INFO = {
'Ema0001': 'Parent feature start and end coordinates exceed those of child features',
Expand Down
1 change: 0 additions & 1 deletion gff3tool/lib/auto_assignment/blastn

This file was deleted.

31 changes: 17 additions & 14 deletions gff3tool/lib/auto_assignment/create_annotation_summaries_nov21-7.pl
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@

open FI, "$transcript_type" or die "[Error] Cannot open $transcript_type.";
while (<FI>){
chomp $_;
chomp $_;
$_ =~ s/\R//g;
$trans_type{$_} = $_;
}
close FI;
}
close FI;

while ( my $line = <$GFF> ){
chomp $line;
$line =~ s/\R//g;
#ignore commented lines
if ( $line =~ /^#/ ){
next;
Expand All @@ -71,7 +73,7 @@
my $stop = $array[4];
my $id = "NA";
my $owner = "NA";
my $name= "NA";
my $name= "NA";
my $symbol="NA";
my $mod_date="NA";
my $comments="NA";
Expand Down Expand Up @@ -109,7 +111,7 @@
}
#populate gene/pseudogene hash
if ( $array[2] =~ /gene|pseudogene/ ){
#in gene hash, key: id; value: name, SO type, date modified, notes.
#in gene hash, key: id; value: name, SO type, date modified, notes.
$gene_ids{$id} = "$name\t$id\t$array[2]\t$mod_date\t$comments\t$replace\t$status";
}
#populate transcript hash
Expand All @@ -121,13 +123,13 @@
elsif ($parent eq "NA" ){
$gene_ids{$parent} = "NA\tNA\tNA\tNA\tNA\tNA\tNA";
my $link = "https://apollo.nal.usda.gov/".$species_code."/jbrowse/?loc=".$scaffold."%3A".$start."..".$stop."&tracks=DNA%2CAnnotations%2C".$species_code."_current_models&highlight=";
$transcript_ids{$id} = "$gene_ids{$parent}\t$owner\t$scaffold\t$start\t$stop\t$strand\t$array[2]\t$name\t$id\t$comments\t$replace\t$status\t$link";
$transcript_ids{$id} = "$gene_ids{$parent}\t$owner\t$scaffold\t$start\t$stop\t$strand\t$array[2]\t$name\t$id\t$comments\t$replace\t$status\t$link";
}
else {
warn "parents and children out of synch here:\n$parent\t$id\n";
}
}
#populate transcript_ids hash w sequence mods (top-level, no parents, no children)
#populate transcript_ids hash w sequence mods (top-level, no parents, no children)
elsif ( $array[2] =~ /deletion|insertion|substitution|transposable_element/ ){
my $link = "https://apollo.nal.usda.gov/".$species_code."/jbrowse/?loc=".$scaffold."%3A".$start."..".$stop."&tracks=DNA%2CAnnotations%2C".$species_code."_current_models&highlight=";
$transcript_ids{$id} = "NA\t$name\t$array[2]\t$mod_date\tNA\tNA\tNA\t$owner\t$scaffold\t$start\t$stop\t$strand\t$array[2]\tNA\t$id\tNA\tNA\tNA\t$link";
Expand Down Expand Up @@ -170,7 +172,7 @@
$CDS_phase{$parent}{$stop} = $array[7];
}
}
#already pre-populate true stop coordinate hash for proper aa length calculaton (accounting for stop codons; complete value in next section. id -> scaf -> dir -> stop
#already pre-populate true stop coordinate hash for proper aa length calculaton (accounting for stop codons; complete value in next section. id -> scaf -> dir -> stop
$cds_true_stop_coordinate{$parent}{$scaffold}{$array[6]} = 1;
}
elsif ( $array[2] =~ /exon/ ){
Expand All @@ -187,8 +189,8 @@
}
}

#will have to slice out string from last 3 bases of CDS (note that this is specific to Web Apollo coding - not all gff3s code their stop codons as CDS) and see whether it matches stop codons (or their reverse complement, depending on strand)
#TAG, TAA, TGA
#will have to slice out string from last 3 bases of CDS (note that this is specific to Web Apollo coding - not all gff3s code their stop codons as CDS) and see whether it matches stop codons (or their reverse complement, depending on strand)
#TAG, TAA, TGA
close $GFF;


Expand All @@ -197,6 +199,7 @@
my $defline;
while ( my $fline = <$FASTA> ){
chomp $fline;
$fline =~ s/\R//g;
if ( $fline =~ /^>(\S+)/ ){
$defline = $1;
}
Expand All @@ -205,14 +208,14 @@
}
}

#id -> scaf -> dir -> 1 (should be populated with true stop coordinate
#id -> scaf -> dir -> 1 (should be populated with true stop coordinate
#code to determine whether stop codon is present in CDS and calculate true aa sequence length
#WIP: STILL NO CODE TO INCORPORATE PHASE
foreach my $cds ( keys %cds_true_stop_coordinate ){
foreach my $scafkey ( keys %{$cds_true_stop_coordinate{$cds}} ){
if ( defined $fasta{$scafkey} ){
foreach my $dirkey ( keys %{$cds_true_stop_coordinate{$cds}{$scafkey}} ){
if ( $dirkey eq "+" ){
if ( $dirkey eq "+" ){
#for forward strand, stop coordinate in CDS_stop is true stop coordinate, and coordinate in CDS_start is true start coordinate
my $forward_stop = $CDS_stop{$cds};
my $stop_slice = substr( $fasta{$scafkey}, ($forward_stop -3), 3);
Expand Down Expand Up @@ -278,7 +281,7 @@
if ( defined $num_exon_introns{$key} ){
#if it has CDS
if ( defined $aas{$key} and defined $CDS_start{$key} and defined $CDS_stop{$key} ){
#if it has a readthrough stop codon
#if it has a readthrough stop codon
if ( defined $stop_codon_readthrough{$key} ){
print $OUT "$transcript_ids{$key}\t$aas{$key}\t$CDS_start{$key}\t$CDS_stop{$key}\t$num_cds_introns{$key}\t$num_exon_introns{$key}\thas_readthrough_stop_codon\n";
}
Expand All @@ -291,7 +294,7 @@
print $OUT "$transcript_ids{$key}\tNA\tNA\tNA\tNA\tNA\tNA\t$num_exon_introns{$key}\tNA\n";
}
}
#if it doesn't have exons
#if it doesn't have exons
else {
print $OUT "$transcript_ids{$key}\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\n";
}
Expand Down
19 changes: 11 additions & 8 deletions gff3tool/lib/auto_assignment/find_match.pl
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,17 @@
open FI, "$transcript_type" or die "[Error] Cannot open $transcript_type.";
while (<FI>){
chomp $_;
$_ =~ s/\R//g;
$trans_type{$_} = $_;
}
close FI;
}
close FI;

print "Reading the gff file: $gff...\n";
open FI, "$gff" or die "[Error] Cannot open $gff.";
while (<FI>){
$line++;
chomp $_;
$_ =~ s/\R//g;
if ($_ =~ /^#/){next;}
my @t = split("\t", $_);
if ($#t != 8){next;}
Expand Down Expand Up @@ -134,6 +136,7 @@
open FI, "$blast" or die "[Error] Cannot open $blast.";
while (<FI>){
chomp $_;
$_ =~ s/\R//g;
if ($_ =~ /^$/){ print "blast result is empty..."; exit; } # Check whether the blast result is epmpty...
my @t = split("\t", $_);
$#t!=11 and next;
Expand All @@ -144,7 +147,7 @@
}elsif ($t[0] =~ /\|Parent=(.+?)\|ID=(.+?)\|/ or $t[0] =~ /\|Parent=(.+?)\|ID=(.+?)$/){
($qpar, $qid) = ($1, $2);
}


if ($t[1] !~ /Parent/){
$t[1] =~ /ID=(.+?)\|/;
Expand Down Expand Up @@ -209,15 +212,15 @@
foreach my $e (sort keys %diffparent){
my @pid = split("\t", $diffparent{$e}->{PAR});
my @t = split("\t", $diffparent{$e}->{BEST});
$t[0] =~ /(.+?):(\d+)\.\.(\d+):(.)\|/;
my ($scaf1, $s1, $e1, $d1) = ($1, $2, $3, $4);
$t[1] =~ /(.+?):(\d+)\.\.(\d+):(.)\|/;
$t[0] =~ /(.+?):(\d+)\.\.(\d+):(.)\|/;
my ($scaf1, $s1, $e1, $d1) = ($1, $2, $3, $4);
$t[1] =~ /(.+?):(\d+)\.\.(\d+):(.)\|/;
my ($scaf2, $s2, $e2, $d2) = ($1, $2, $3, $4);

if ( !defined $gene2url{$pid[0]} ){
$gene2url{$pid[0]} = 'Unassigned';
}


if ($scaf1 eq $scaf2 && $d1 eq $d2){
if (($s1 >= $s2 && $s1 <= $e2) || ($s2 >= $s1 && $s2 <= $e1)){
Expand Down
4 changes: 4 additions & 0 deletions gff3tool/lib/auto_assignment/gen_spreadsheet.pl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
my $head = <FI>;
while (<FI>){
chomp $_;
$_ =~ s/\R//g;
my @t = split("\t", $_);
if (defined $hit{$t[2]}){
$hit{$t[2]} .= ",$t[5]";
Expand All @@ -29,6 +30,7 @@
$head = <FI>;
while (<FI>){
chomp $_;
$_ =~ s/\R//g;
my @t = split("\t", $_);
if (defined $hit2{$t[2]}){
$hit2{$t[2]} .= ",$t[5]";
Expand Down Expand Up @@ -57,6 +59,8 @@
}
}
while (<FI>){
chomp $_;
$_ =~ s/\R//g;
my @t = split("\t", $_);
chomp @t;
my @info = ();
Expand Down
1 change: 0 additions & 1 deletion gff3tool/lib/auto_assignment/makeblastdb

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
while (<FI>){
$line++;
chomp $_;
$_ =~ s/\R//g;
$_ =~ s/\R//g;
if ( $_ =~ /^##FASTA/ ){last;}#go to end of file if there's a FASTA section in the gff3 file
elsif ($_ =~ /^#/){next;}
my @t = split("\t", $_);
Expand Down Expand Up @@ -101,6 +101,7 @@
open FI, "$blast" or die "[Error] Cannot open $blast.";
while (<FI>){
chomp $_;
$_ =~ s/\R//g;
if ($_ =~ /^$/){ print "blast result is empty..."; exit; } # Check whether the blast result is epmpty...
my @t = split("\t", $_);
$#t!=11 and next;
Expand Down
16 changes: 2 additions & 14 deletions gff3tool/lib/function4gff/function4gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,6 @@
QC functions for processing multiple features between models (inter-model) in GFF3 file.
"""
from __future__ import print_function

#from collections import OrderedDict # not available in 2.6
from itertools import groupby
try:
from urllib import quote, unquote
except ImportError:
from urllib.parse import quote, unquote
import sys
import re
import logging
Expand All @@ -25,14 +18,9 @@
lh = logging.StreamHandler()
lh.setFormatter(logging.Formatter('%(levelname)-8s %(message)s'))
logger.addHandler(lh)
from os.path import dirname
if dirname(__file__) == '':
lib_path = '../../lib'
else:
lib_path = dirname(__file__) + '/../../lib'
sys.path.insert(1, lib_path)
from gff3tool.bin import version

__version__ = '0.0.1'
__version__ = version.__version__

def randomID(size=32, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
Expand Down
Loading

0 comments on commit 8a9ead2

Please sign in to comment.