Source code for mrparse.mr_topcons
"""
Created on 16 Nov 2018
@author: jmht
"""
import logging
import os
import shutil
import time
import zipfile
from mrparse.mr_annotation import AnnotationSymbol, SequenceAnnotation
from mrparse.mr_util import now, run_cmd
[docs]class OutOfTimeException(Exception):
pass
POLL_TIME = 2
MAX_POLL_TIME = 120
TM = AnnotationSymbol()
TM.symbol = 'M'
TM.name = 'TM'
TM.stype = 'Transmembrane Helix'
logger = logging.getLogger(__name__)
[docs]class TMPred(object):
def __init__(self, seq_info):
self.seq_info = seq_info
self.prediction = None
script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)),'scripts')
self.topcons_script = os.path.join(script_dir, 'topcons2_wsdl.py')
self.poll_time = POLL_TIME
self.max_poll_time = MAX_POLL_TIME
[docs] def parse_topcons_directory(self, results_dir):
assert os.path.isdir(results_dir), "Cannot find directory: %s" % results_dir
results_file = os.path.join(results_dir, 'query.result.txt')
return self.parse_topcons_output(results_file)
[docs] @staticmethod
def parse_topcons_output(results_file):
with open(results_file) as fh:
line = fh.readline()
while line:
if line.startswith('TOPCONS predicted topology:'):
prediction = fh.readline().strip()
if line.startswith('Predicted TOPCONS reliability'):
fh.readline()
line = fh.readline().strip()
probabilties = []
while line:
try:
seqid, prob = line.split()
except ValueError:
break
probabilties.append((int(seqid), float(prob)))
line = fh.readline().strip()
line = fh.readline()
probabilties = TMPred.fix_probabilties(prediction, probabilties)
return prediction, probabilties
[docs] def create_annotation(self, annotation, probabilties):
ann = SequenceAnnotation()
ann.source = 'TopCons server'
ann.library_add_annotation(TM)
ann.scores = probabilties
ann.annotation = annotation
return ann
[docs] @staticmethod
def fix_probabilties(prediction, probabilties):
DEFAULT_PROBABILITY = 50.0
seqid, prob = probabilties.pop(0)
_probabilities = []
for i, pred in enumerate(prediction):
if pred == TM.symbol:
if seqid == i + 1: # Assum seqids start from 1
new_prob = prob
else:
new_prob = DEFAULT_PROBABILITY
else:
new_prob = 0.0
_probabilities.append(new_prob)
if seqid == i + 1:
try:
seqid, prob = probabilties.pop(0)
except IndexError:
# End of list
pass
# normalise to between 0.0 and 1.0
probabilities = [p / 100.0 if p > 0.0 else 0.0 for p in _probabilities]
return probabilities
[docs] def run_topcons(self, seqin):
jobid = self.submit_job(seqin)
start = time.time()
while True:
elapsed_time = time.time() - start
if elapsed_time > self.max_poll_time:
raise OutOfTimeException("Exceed maximum runtime of: %d" % self.max_poll_time)
if self.job_finished(jobid):
break
time.sleep(self.poll_time)
results_dir = self.get_results(jobid)
return results_dir
[docs] def submit_job(self, seqin):
cmd = [self.topcons_script, '-m', 'submit', '-seq', seqin]
out = run_cmd(cmd)
jobid = None
for line in out.split(os.linesep):
if line.startswith("You have successfully submitted your job"):
jobid = line.split('=')[1].strip()
if jobid is None:
raise RuntimeError("Error submitting topcons job: %s" % out)
return jobid
[docs] def job_finished(self, jobid):
"""
Your job with jobid rst_rUF74H is finished!
The result file ./rst_rUF74H.zip has been retrieved for jobid rst_rUF74H
"""
cmd = [self.topcons_script, '-m', 'get', '-jobid', jobid]
out = run_cmd(cmd)
_jobid = None
for line in out.split(os.linesep):
if line.endswith("finished!"):
_jobid = line.split()[4]
if _jobid != jobid:
raise RuntimeError("Error collecting topcons job: %s" % out)
else:
return True
elif line.endswith("Please check you typing!"):
raise RuntimeError("Incorrect jobid: %s" % jobid)
return False
[docs] def get_results(self, jobid):
ziparchive = jobid + '.zip'
if not zipfile.is_zipfile(ziparchive):
raise RuntimeError('File is not a valid zip archive: {0}'.format(ziparchive))
zipf = zipfile.ZipFile(ziparchive)
if not zipf.infolist():
raise RuntimeError('Empty zip file: {0}'.format(ziparchive))
zipf.extractall()
assert os.path.isdir(jobid)
return os.path.abspath(jobid)
[docs] def cleanup(self, results_dir):
if os.path.isdir(results_dir):
shutil.rmtree(results_dir)
[docs] def get_prediction(self):
logger.debug("TMPred starting prediction at: %s" % now())
topcons_dir = self.run_topcons(self.seq_info.sequence_file)
prediction, scores = self.parse_topcons_directory(topcons_dir)
self.prediction = self.create_annotation(prediction, scores)
logger.debug("TMPred finished prediction at: %s" % now())
#self.cleanup()