Source code for mrparse.mr_tmhmm

"""
Created on 18 Dec 2020

@author: hlasimpk
"""
import logging
import os
from pyjob import cexec
import glob

from mrparse.mr_annotation import AnnotationSymbol, SequenceAnnotation
from mrparse.mr_util import now

TM = AnnotationSymbol()
TM.symbol = 'M'
TM.name = 'TM'
TM.stype = 'Transmembrane Helix'

logger = logging.getLogger(__name__)


[docs]class TMPred(object): def __init__(self, seq_info, tmhmm_exe="tmhmm"): self.seq_info = seq_info self.tmhmm_exe = tmhmm_exe self.prediction = None script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'scripts') self.tmhmm_model = os.path.join(script_dir, 'TMHMM2.0.model')
[docs] @staticmethod def prepare_seqin(seqin): with open(seqin) as f: lines = f.readlines() if len(lines[0].split()) < 2: lines[0] = lines[0].replace('\n', ' 1\n') with open(seqin, "w") as f: f.writelines(lines)
[docs] @staticmethod def parse_tmhmm_output(annotation_file, plot_file): prediction = "" with open(annotation_file) as fh: line = fh.readline() while line: if line.startswith('>'): line = fh.readline() prediction += line.replace('\n', '') line = fh.readline() probabilties = [[], [], []] with open(plot_file) as fh: line = fh.readline() while line: if line.startswith('inside membrane outside'): line = fh.readline() probabilties[0].append(line.split()[0]) probabilties[1].append(line.split()[1]) probabilties[2].append(line.split()[2]) line = fh.readline() probabilties = TMPred.fix_probabilties(prediction, probabilties) return prediction, probabilties
[docs] @staticmethod def fix_probabilties(prediction, probabilties): _probabilities = [] for i, pred in enumerate(prediction): if pred == "i": prob = probabilties[0][i] elif pred == "M": prob = probabilties[1][i] else: prob = probabilties[2][i] _probabilities.append(prob) return _probabilities
[docs] def create_annotation(self, annotation, probabilties): annotation = annotation.replace('i', 'M') ann = SequenceAnnotation() ann.source = 'TMHMM' ann.library_add_annotation(TM) ann.scores = probabilties ann.annotation = annotation return ann
[docs] def run_job(self, seqin): cmd = [self.tmhmm_exe, '-f', seqin, '-m', self.tmhmm_model] cexec(cmd) return
[docs] def get_prediction(self): logger.debug("TMHMM starting prediction at: %s" % now()) # If there are no spaces in the fasta header, tmhmm gives an error self.prepare_seqin(self.seq_info.sequence_file) self.run_job(self.seq_info.sequence_file) annotation_file = glob.glob("*.annotation")[0] plot_file = glob.glob("*.plot")[0] prediction, scores = self.parse_tmhmm_output(annotation_file, plot_file) self.prediction = self.create_annotation(prediction, scores) logger.debug("TMHMM finished prediction at: %s" % now())