Source code for mrparse.mr_jpred

"""
Created on 14 Nov 2018

@author: jmht
"""
import logging
import os
import re
import shutil
import tarfile

from mrparse.mr_annotation import AnnotationSymbol, SequenceAnnotation
from mrparse.mr_util import now, run_cmd

JPRED_SUBMISSION_EMAIL = 'jens.thomas@liverpool.ac.uk'

logger = logging.getLogger(__name__)
#logger.addHandler(logging.NullHandler()

HELIX = AnnotationSymbol()
HELIX.symbol = 'H'
HELIX.stype = 'Alpha Helix'
HELIX.name = 'helix'

SHEET = AnnotationSymbol()
SHEET.symbol = 'E'
SHEET.stype = 'Strand'
SHEET.name = 'strand'


[docs]class OutOfTimeException(Exception): pass
[docs]class JPred(object): def __init__(self, seq_info=None): self.seq_info = seq_info script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)),'scripts') self.jpred_script = os.path.join(script_dir, 'jpredapi') self.prediction = None self.exception = None
[docs] def find_results_file(self, jpred_rundir): if not os.path.isdir(jpred_rundir): raise RuntimeError("Cannot find directory:%s" % jpred_rundir) out_concise = [f for f in os.listdir(jpred_rundir) if f.endswith('.concise')][0] out_concise = os.path.join(jpred_rundir, out_concise) return out_concise
[docs] @staticmethod def parse_jpred_output(out_concise): ss_pred = None cc_28 = None logger.debug('Parsing JPRED concise output: %s' % out_concise) with open(out_concise) as f: line = f.readline() while line: prefix = 'Lupas_28:' if line.startswith(prefix): line = line.strip().replace(prefix,'') cc_28 = "".join(line.split(",")) prefix = 'jnetpred:' if line.startswith(prefix): line = line.strip().replace(prefix,'') ss_pred = "".join(line.split(",")) line = f.readline() assert ss_pred and cc_28 return ss_pred, cc_28
[docs] @staticmethod def parse_results_output(output): """Parse directory path of JPRED Your job status will be checked with the following parameters: JobId: jp_H_5vG49 getResults: yes checkEvery: 10 [sec] Thu Nov 29 15:02:01 2018 ---> Job jp_H_5vG49 finished. Results available at the following URL: http://www.compbio.dundee.ac.uk/jpred4/results/jp_H_5vG49/jp_H_5vG49.results.html Will attempt to download results now (using 'wget') from: http://www.compbio.dundee.ac.uk/jpred4/results/jp_H_5vG49/jp_H_5vG49.tar.gz Job results archive is now available at: jp_H_5vG49/jp_H_5vG49.tar.gz """ dpath = None mregx = 'Job results archive is now available at: ?(\S+/\S+\.tar\.gz)' match = re.search(mregx, output) if not match: raise RuntimeError("Cannot parse directory path from output: {}".format(output)) dpath = match.group(1) return dpath
[docs] @staticmethod def parse_status_url(output): """Parse jobid and status url for JPRED Your job will be submitted with the following parameters: file: ../data/Q13586.fasta format: seq skipPDB: on email: jens.thomas@liverpool.ac.uk name: jens_test_job Created JPred job with jobid: jp_H_5vG49 You can check the status of the job using the following URL: http://www.compbio.dundee.ac.uk/jpred4/cgi-bin/chklog?jp_H_5vG49 ...or using 'perl jpredapi status jobid=jp_H_5vG49 getResults=yes checkEvery=60 silent' command (Check documentation for more details.) """ jobid, status_url = None, None mregx = 'Created JPred job with jobid: (\S+)\s+You can check the status of the job using the following URL: ?(http?://\S+)' match = re.search(mregx, output) if not match: raise RuntimeError("Cannot parse jobid and status_url from output: {}".format(output)) jobid = match.group(1) status_url = match.group(2) return jobid, status_url
[docs] @staticmethod def create_annotation(annotation): ann = SequenceAnnotation() ann.source = 'Jpred server' ann.annotation = annotation ann.library_add_annotation(HELIX) ann.library_add_annotation(SHEET) ann.scores = [1.0] * len(annotation) return ann
[docs] def get_prediction(self, download_tgz=None, jpred_output=None, cleanup=False): """Calculate SS using the online JPRED server Parameters ---------- download_tgz : str A results tar.gz file from the JPRED server - FOR RUNNING UNIT TESTS jpred_output : str A results file from the JPRED server - FOR RUNNING UNIT TESTS cleanup : bool Delete the downloaded/unpacked results """ if not (download_tgz or jpred_output): # for testing if not os.path.isfile(self.seq_info.sequence_file): msg = "Cannot find JPRED sequence file: %s" % self.seq_info.sequence_file self.exception = msg logger.critical(msg) raise RuntimeError(msg) try: download_tgz = self.run_jpred(self.seq_info.sequence_file) except Exception as e: logger.critical(e) self.exception = e raise e if not jpred_output: results_directory = self.unpack_results(download_tgz) jpred_output = self.find_results_file(results_directory) ss_pred, _ = self.parse_jpred_output(jpred_output) if cleanup: self.cleanup(results_directory) self.prediction = self.create_annotation(ss_pred) logger.debug("JPred finished prediction at: %s" % now()) return self.prediction
[docs] def run_jpred(self, seqin): logger.debug("JPred starting prediction at: %s" % now()) jobid = self.submit_job(seqin) download_tgz = self.get_results(jobid) return download_tgz
[docs] def submit_job(self, seqin): cmd = [self.jpred_script, 'submit', 'file=%s' % seqin, 'mode=single', 'format=fasta', # 'email={}'.format(JPRED_SUBMISSION_EMAIL), 'name=ccp4_mrparse_submission', 'skipPDB=on'] out = run_cmd(cmd) jobid, status_url = self.parse_status_url(out) logger.info("*** Submitted JPRED job with id %s - check its progress here: %s", jobid, status_url) return jobid
[docs] def get_results(self, jobid): """Check results and download from the server""" cmd = [self.jpred_script, 'status', 'jobid=%s' % jobid, 'getResults=yes', 'checkEvery=10'] out = run_cmd(cmd) download_tgz = self.parse_results_output(out) download_tgz = os.path.abspath(download_tgz) logger.debug("JPred results downloaded to: %s" % download_tgz) return download_tgz
[docs] def unpack_results(self, download_tgz): job_directory = os.path.dirname(download_tgz) with tarfile.open(download_tgz, 'r:*') as tf: if not tf.getmembers(): raise RuntimeError('Empty archive: %s' % download_tgz) tf.extractall(path=job_directory) logger.debug('Extracted jpred files to: %s' % job_directory) return job_directory
[docs] def cleanup(self, results_directory): if results_directory and os.path.isdir(results_directory): logger.debug('Removing jpred results directory: %s' % results_directory) shutil.rmtree(results_directory)