Source code for mrparse.mr_analyse

import json
import multiprocessing
import os
import subprocess
import sys

from jinja2 import Environment, FileSystemLoader

from mrparse.mr_log import setup_logging
from mrparse.mr_util import make_workdir, now
from mrparse.mr_hkl import HklInfo
from mrparse.mr_search_model import SearchModelFinder
from mrparse.mr_sequence import Sequence, MultipleSequenceException, merge_multiple_sequences
from mrparse.mr_classify import MrClassifier
from mrparse.mr_version import __version__

THIS_DIR = os.path.abspath(os.path.dirname(__file__))
HTML_DIR = os.path.join(THIS_DIR, 'html')
HTML_TEMPLATE = os.path.join(HTML_DIR, 'mrparse.html.jinja2')
HTML_OUT = 'mrparse.html'
HOMOLOGS_JS = 'homologs.json'
MODELS_JS = 'models.json'

logger = None


[docs]def run(seqin, hklin=None, run_serial=False, do_classify=True, pdb_dir=None, phmmer_dblvl=None, search_engine=None,
        tmhmm_exe=None, deepcoil_exe=None, hhsearch_exe=None, hhsearch_db=None, ccp4cloud=None):
    # Need to make a work directory first as all logs go into there
    work_dir = make_workdir()
    os.chdir(work_dir)
    global logger
    logger = setup_logging()
    program_name = os.path.basename(sys.argv[0])
    logger.info("Running: %s", program_name)
    logger.info("Version: %s", __version__)
    logger.info("Program started at: %s", now())
    logger.info("Running from directory: %s", work_dir)

    if not (seqin and os.path.isfile(seqin)):
        raise RuntimeError("Cannot find seqin file: %s" % seqin)
    logger.info("Running with seqin %s", os.path.abspath(seqin))

    try:
        seq_info = Sequence(seqin)
    except MultipleSequenceException:
        logger.info("Multiple sequences found seqin: %s\n\nAttempting to merge sequences", seqin)
        seq_info = merge_multiple_sequences(seqin)
        logger.info("Merged sequence file: %s", seq_info.sequence_file)

    hkl_info = None
    if hklin:
        if not os.path.isfile(hklin):
            raise RuntimeError("Cannot find hklin file: %s" % hklin)
        logger.info("Running with hklin %s", os.path.abspath(hklin))
        hkl_info = HklInfo(hklin, seq_info=seq_info)

    if search_engine == "hhsearch":
        if not hhsearch_exe:
            raise RuntimeError("HHSearch executable needs to be defined with --hhsearch_exe")
        elif not hhsearch_db:
            raise RuntimeError("HHSearch database needs to be defined with --hhsearch_db")

    search_model_finder = SearchModelFinder(seq_info, hkl_info=hkl_info, pdb_dir=pdb_dir, phmmer_dblvl=phmmer_dblvl,
                                            search_engine=search_engine, hhsearch_exe=hhsearch_exe,
                                            hhsearch_db=hhsearch_db)

    classifier = None
    if do_classify:
        classifier = MrClassifier(seq_info=seq_info, tmhmm_exe=tmhmm_exe, deepcoil_exe=deepcoil_exe)

    if run_serial:
        run_analyse_serial(search_model_finder, classifier, hkl_info, do_classify)
    else:
        search_model_finder, classifier, hkl_info = run_analyse_parallel(search_model_finder,
                                                                         classifier,
                                                                         hkl_info,
                                                                         do_classify)

    #     results_json = get_results_json(search_model_finder, hkl_info=hkl_info, classifier=classifier)
    #     html_out = write_output_files(results_json)
    html_out = write_output_files(search_model_finder, hkl_info=hkl_info, classifier=classifier, ccp4cloud=ccp4cloud)
    logger.info("Wrote MrParse output file: %s", html_out)

    if not ccp4cloud:
        opencmd = None
        if sys.platform.lower().startswith('linux'):
            opencmd = 'xdg-open'
        elif sys.platform.lower().startswith('darwin'):
            opencmd = 'open'
        if opencmd:
            subprocess.Popen([opencmd, html_out])
    return 0


[docs]def run_analyse_serial(search_model_finder, classifier, hkl_info, do_classify):
    try:
        search_model_finder()
    except Exception as e:
        logger.critical('SearchModelFinder failed: %s' % e)
        logger.debug("Traceback is:", exc_info=sys.exc_info())
    if do_classify:
        try:
            classifier()
        except Exception as e:
            logger.critical('MrClassifier failed: %s' % e)
            logger.debug("Traceback is:", exc_info=sys.exc_info())
    if hkl_info:
        try:
            hkl_info()
        except Exception as e:
            logger.critical('HklInfo failed: %s' % e)
            logger.debug("Traceback is:", exc_info=sys.exc_info())


[docs]def run_analyse_parallel(search_model_finder, classifier, hkl_info, do_classify):
    nproc = 3 if hkl_info else 2
    logger.info("Running on %d processors." % nproc)
    pool = multiprocessing.Pool(nproc)
    smf_result = pool.apply_async(search_model_finder)

    if do_classify:
        mrc_result = pool.apply_async(classifier)
    if hkl_info:
        hklin_result = pool.apply_async(hkl_info)
    pool.close()
    logger.debug("Pool waiting")
    pool.join()
    logger.debug("Pool finished")
    try:
        search_model_finder = smf_result.get()
    except Exception as e:
        logger.critical('SearchModelFinder failed: %s' % e)
        logger.debug("Traceback is:", exc_info=sys.exc_info())
    if do_classify:
        try:
            classifier = mrc_result.get()
        except Exception as e:
            logger.critical('MrClassifier failed: %s' % e)
            logger.debug("Traceback is:", exc_info=sys.exc_info())
    if hkl_info:
        try:
            hkl_info = hklin_result.get()
        except Exception as e:
            logger.critical('HklInfo failed: %s' % e)
            logger.debug("Traceback is:", exc_info=sys.exc_info())
    return search_model_finder, classifier, hkl_info


[docs]def write_output_files(search_model_finder, hkl_info=None, classifier=None, ccp4cloud=None):
    # write out homologs for CCP4cloud
    # This code should be updated to separate the storing of homologs from the PFAM directives

    homologs_pfam = {}
    try:
        homologs = search_model_finder.homologs_as_dicts()
        homologs_js_out = os.path.abspath(HOMOLOGS_JS)
        with open(homologs_js_out, 'w') as w:
            w.write(json.dumps(homologs))
        homologs_pfam = search_model_finder.homologs_with_graphics()
        if ccp4cloud:
            for homolog in homologs_pfam:
                del homolog['pdb_file']
    except RuntimeError:
        logger.debug('No homologues found')

    models_pfam = {}
    try:
        models = search_model_finder.models_as_dicts()
        models_js_out = os.path.abspath(MODELS_JS)
        with open(models_js_out, 'w') as w:
            w.write(json.dumps(models))
        models_pfam = search_model_finder.models_with_graphics()
        if ccp4cloud:
            for model in models_pfam:
                del model['pdb_file']
    except RuntimeError:
        logger.debug('No models found')

    results_dict = {'pfam': {'homologs': homologs_pfam, 'models': models_pfam}}
    if classifier:
        results_dict['pfam'].update(classifier.pfam_dict())
    if hkl_info:
        results_dict['hkl_info'] = hkl_info.as_dict()
        if ccp4cloud:
            del results_dict['hkl_info']['hklin']
    results_json = json.dumps(results_dict)

    html_out = os.path.abspath(HTML_OUT)
    render_template(HTML_TEMPLATE, html_out,
                    # kwargs appear as variables in the template
                    mrparse_html_dir=HTML_DIR,
                    results_json=results_json,
                    version=__version__)
    return html_out


[docs]def render_template(in_file_path, out_file_path, **kwargs):
    """
    Templates the given file with the keyword arguments.

    Parameters
    ----------
    in_file_path : str
       The path to the template
    out_file_path : str
       The path to output the templated file
    **kwargs : dict
       Variables to use in templating
    """
    env = Environment(loader=FileSystemLoader(os.path.dirname(in_file_path)), keep_trailing_newline=True)
    template = env.get_template(os.path.basename(in_file_path))
    output = template.render(**kwargs)
    with open(out_file_path, "w") as f:
        f.write(output)