Skip to content

Commit

Permalink
made a new PAMFinder class
Browse files Browse the repository at this point in the history
  • Loading branch information
ryandward committed Mar 7, 2024
1 parent 38bb51c commit 2d1a877
Showing 1 changed file with 56 additions and 39 deletions.
95 changes: 56 additions & 39 deletions GenBankParser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from Bio import SeqIO
import pyranges as pr
import pandas as pd
Expand All @@ -15,13 +16,65 @@ def __init__(self, filename):
def records(self):
with open(self.filename, "r") as handle:
return SeqIO.to_dict(SeqIO.parse(handle, "genbank"))




class PAMFinder:
def __init__(self, records, pam, direction):
self.records = records
self.pam = pam
self.pam_length = len(pam)
self.pam_pattern = self.pam.replace("N", "[ATCG]")

self.direction = direction

def get_pam_seq(self, row):
# Fetch the sequence for the range
sequence = self.records[row.Chromosome].seq[row.Start : row.End]

# If the strand is "-", get the reverse complement of the sequence
if row.Strand == "-":
sequence = sequence.reverse_complement()

# Get the PAM sequence
if self.direction == "upstream":

if row.Strand == "+":
pam_sequence = self.records[row.Chromosome].seq[
row.Start - self.pam_length : row.Start
]
else:
pam_sequence = self.records[row.Chromosome].seq[
row.End : row.End + self.pam_length
]
elif self.direction == "downstream":
if row.Strand == "+":
pam_sequence = self.records[row.Chromosome].seq[
row.End : row.End + self.pam_length
]
else:
pam_sequence = self.records[row.Chromosome].seq[
row.Start - self.pam_length : row.Start
]
else:
raise ValueError("direction must be 'upstream' or 'downstream'")

# If the strand is "-", get the reverse complement of the PAM sequence
if row.Strand == "-":
pam_sequence = pam_sequence.reverse_complement()

return str(pam_sequence)

def pam_matches(self, sequence):
# check if the sequence matches the PAM pattern
return bool(re.search(self.pam_pattern, sequence))


class GenBankParser(Logger):
def __init__(self, filename):
super().__init__()
self.reader = GenBankReader(filename)
self.records = self.reader.records
# self.pam_finder = PAMFinder(self.records)
self.info(f"Found the following records:")
self.json(self.organisms)

Expand Down Expand Up @@ -104,7 +157,7 @@ def make_fasta(self, filename):
# Write the records to a FASTA file
with open(filename, "w") as fasta_file:
SeqIO.write(self.records.values(), fasta_file, "fasta")

def find_gene_name_for_locus(self, locus_tag):
# Iterate through all records in the GenBank file
for record_id, record in self.records.items():
Expand All @@ -119,39 +172,3 @@ def find_gene_name_for_locus(self, locus_tag):
return feature.qualifiers.get("gene", [locus_tag])[0]
# Return None or locus_tag if not found; depends on how you want to handle not found cases
return None

def get_pam_sequence(self, row, pam_length, direction):
# Fetch the sequence for the range
sequence = self.records[row.Chromosome].seq[row.Start : row.End]

# If the strand is "-", get the reverse complement of the sequence
if row.Strand == "-":
sequence = sequence.reverse_complement()

# Get the PAM sequence
if direction == "upstream":
if row.Strand == "+":
pam_sequence = self.records[row.Chromosome].seq[
row.Start - pam_length : row.Start
]
else:
pam_sequence = self.records[row.Chromosome].seq[
row.End : row.End + pam_length
]
elif direction == "downstream":
if row.Strand == "+":
pam_sequence = self.records[row.Chromosome].seq[
row.End : row.End + pam_length
]
else:
pam_sequence = self.records[row.Chromosome].seq[
row.Start - pam_length : row.Start
]
else:
raise ValueError("direction must be 'upstream' or 'downstream'")

# If the strand is "-", get the reverse complement of the PAM sequence
if row.Strand == "-":
pam_sequence = pam_sequence.reverse_complement()

return str(pam_sequence)

0 comments on commit 2d1a877

Please sign in to comment.