Source code for mrparse.mr_search_model
"""
Created on 18 Oct 2018
@author: jmht
"""
import logging
from mrparse import mr_homolog
from mrparse import mr_alphafold
from mrparse import mr_hit
from mrparse.mr_region import RegionFinder
from mrparse import mr_pfam
from mrparse.mr_util import now
logger = logging.getLogger(__name__)
[docs]class SearchModelFinder(object):
def __init__(self, seq_info, hkl_info=None, pdb_dir=None, search_engine=None, phmmer_dblvl=None,
hhsearch_exe=None, hhsearch_db=None):
self.seq_info = seq_info
self.hkl_info = hkl_info
self.pdb_dir = pdb_dir
self.search_engine = search_engine
self.phmmer_dblvl = phmmer_dblvl
self.hhsearch_exe = hhsearch_exe
self.hhsearch_db = hhsearch_db
self.hits = None
self.model_hits = None
self.regions = None
self.model_regions = None
self.homologs = None
self.models = None
def __call__(self):
"""Required so that we can use multiprocessing pool. We need to be able to pickle the object passed
to the pool and instance methods don't work, so we add the object to the pool and define __call__
https://stackoverflow.com/questions/1816958/cant-pickle-type-instancemethod-when-using-multiprocessing-pool-map/6975654#6975654
"""
logger.debug('SearchModelFinder started at %s' % now())
self.find_homolog_regions()
logger.debug('SearchModelFinder homolog regions done at %s' % now())
self.prepare_homologs()
logger.debug('SearchModelFinder homologs done at %s' % now())
self.find_model_regions()
logger.debug('SearchModelFinder model regions done at %s' % now())
self.prepare_models()
logger.debug('SearchModelFinder models done at %s' % now())
return self
[docs] def find_homolog_regions(self):
self.hits = mr_hit.find_hits(self.seq_info, search_engine=self.search_engine,
hhsearch_exe=self.hhsearch_exe, hhsearch_db=self.hhsearch_db,
phmmer_dblvl=self.phmmer_dblvl)
if not self.hits:
logger.critical('SearchModelFinder PDB search could not find any hits!')
return None
self.regions = RegionFinder().find_regions_from_hits(self.hits)
return self.regions
[docs] def find_model_regions(self):
self.model_hits = mr_hit.find_hits(self.seq_info, search_engine="phmmer",
hhsearch_exe=None, hhsearch_db=None, phmmer_dblvl="af2")
if not self.model_hits:
logger.critical('SearchModelFinder EBI Alphafold database search could not find any hits!')
return None
self.model_regions = RegionFinder().find_regions_from_hits(self.model_hits)
return self.model_regions
[docs] def prepare_homologs(self):
if not self.hits and self.regions:
return None
self.homologs = mr_homolog.homologs_from_hits(self.hits, self.pdb_dir)
if self.hkl_info:
mr_homolog.calculate_ellg(self.homologs, self.hkl_info)
return self.homologs
[docs] def prepare_models(self):
if not self.model_hits and self.model_regions:
return None
self.models = mr_alphafold.models_from_hits(self.model_hits)
return self.models
[docs] def homologs_as_dicts(self):
"""Return a list of per homlog dictionaries serializable to JSON"""
if not (self.regions and len(self.regions)):
raise RuntimeError("No regions generated by SearchModelFinder")
return [h.static_dict for h in self.homologs.values()]
[docs] def models_as_dicts(self):
"""Return a list of per model dictionaries serializable to JSON"""
if not (self.model_regions and len(self.model_regions)):
raise RuntimeError("No regions generated by SearchModelFinder")
return sorted([m.static_dict for m in self.models.values()], key=lambda k: k['sum_plddt'], reverse=True)[:20]
[docs] def homologs_with_graphics(self):
"""List of homologs including PFAM graphics directives
This needs to be done better - the PFAM graphics shouldn't be stored in the
list of homologs - this was just done because it made development quicker.
The list of homologs and PFAM graphics needs to be kept separate
"""
if not (self.regions and len(self.regions)):
raise RuntimeError("No regions generated by SearchModelFinder")
mr_pfam.add_pfam_dict_to_homologs(self.homologs, self.seq_info.nresidues)
return self.homologs_as_dicts()
[docs] def models_with_graphics(self):
"""List of models including PFAM graphics directives
This needs to be done better - the PFAM graphics shouldn't be stored in the
list of models - this was just done because it made development quicker.
The list of models and PFAM graphics needs to be kept separate
"""
if not (self.model_regions and len(self.model_regions)):
raise RuntimeError("No regions generated by SearchModelFinder")
mr_pfam.add_pfam_dict_to_models(self.models, self.seq_info.nresidues)
return self.models_as_dicts()